xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6dc1ffa334a818f0c04a492523779f41fa89acef)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
639 */
640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
641 {
642   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
643   Mat            A           = aij->A; /* diagonal part of the matrix */
644   Mat            B           = aij->B; /* offdiagonal part of the matrix */
645   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
646   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
647   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
648   PetscInt       *ailen      = a->ilen,*aj = a->j;
649   PetscInt       *bilen      = b->ilen,*bj = b->j;
650   PetscInt       am          = aij->A->rmap->n,j;
651   PetscInt       diag_so_far = 0,dnz;
652   PetscInt       offd_so_far = 0,onz;
653 
654   PetscFunctionBegin;
655   /* Iterate over all rows of the matrix */
656   for (j=0; j<am; j++) {
657     dnz = onz = 0;
658     /*  Iterate over all non-zero columns of the current row */
659     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
660       /* If column is in the diagonal */
661       if (mat_j[col] >= cstart && mat_j[col] < cend) {
662         aj[diag_so_far++] = mat_j[col] - cstart;
663         dnz++;
664       } else { /* off-diagonal entries */
665         bj[offd_so_far++] = mat_j[col];
666         onz++;
667       }
668     }
669     ailen[j] = dnz;
670     bilen[j] = onz;
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
677     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
678     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
679 */
680 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[],
681                                                      const PetscInt full_diag_i[],const PetscInt full_offd_i[])
682 {
683   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
684   Mat            A      = aij->A; /* diagonal part of the matrix */
685   Mat            B      = aij->B; /* offdiagonal part of the matrix */
686   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
687   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
688   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
689   PetscInt       *ailen = a->ilen,*aj = a->j;
690   PetscInt       *bilen = b->ilen,*bj = b->j;
691   PetscInt       am     = aij->A->rmap->n,j;
692   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
693   PetscScalar    *aa = a->a,*ba = b->a;
694 
695   PetscFunctionBegin;
696   /* Iterate over all rows of the matrix */
697   for (j=0; j<am; j++) {
698     dnz_row = onz_row = 0;
699     /*  Iterate over all non-zero columns of the current row */
700     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
701       rowstart_offd = full_offd_i[j];
702       rowstart_diag = full_diag_i[j];
703       /* If column is in the diagonal */
704       if (mat_j[col] >= cstart && mat_j[col] < cend) {
705         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
706         aa[rowstart_diag+dnz_row] = mat_a[col];
707         dnz_row++;
708       } else { /* off-diagonal entries */
709         bj[rowstart_offd+onz_row] = mat_j[col];
710         ba[rowstart_offd+onz_row] = mat_a[col];
711         onz_row++;
712       }
713     }
714     ailen[j] = dnz_row;
715     bilen[j] = onz_row;
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
721 {
722   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
723   PetscErrorCode ierr;
724   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
725   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
726 
727   PetscFunctionBegin;
728   for (i=0; i<m; i++) {
729     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
730     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
731     if (idxm[i] >= rstart && idxm[i] < rend) {
732       row = idxm[i] - rstart;
733       for (j=0; j<n; j++) {
734         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
735         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
736         if (idxn[j] >= cstart && idxn[j] < cend) {
737           col  = idxn[j] - cstart;
738           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
739         } else {
740           if (!aij->colmap) {
741             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
742           }
743 #if defined(PETSC_USE_CTABLE)
744           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
745           col--;
746 #else
747           col = aij->colmap[idxn[j]] - 1;
748 #endif
749           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
750           else {
751             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
752           }
753         }
754       }
755     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
756   }
757   PetscFunctionReturn(0);
758 }
759 
760 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
761 
762 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
763 {
764   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
765   PetscErrorCode ierr;
766   PetscInt       nstash,reallocs;
767 
768   PetscFunctionBegin;
769   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
770 
771   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
772   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
773   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
774   PetscFunctionReturn(0);
775 }
776 
777 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
778 {
779   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
780   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
781   PetscErrorCode ierr;
782   PetscMPIInt    n;
783   PetscInt       i,j,rstart,ncols,flg;
784   PetscInt       *row,*col;
785   PetscBool      other_disassembled;
786   PetscScalar    *val;
787 
788   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
789 
790   PetscFunctionBegin;
791   if (!aij->donotstash && !mat->nooffprocentries) {
792     while (1) {
793       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
794       if (!flg) break;
795 
796       for (i=0; i<n; ) {
797         /* Now identify the consecutive vals belonging to the same row */
798         for (j=i,rstart=row[j]; j<n; j++) {
799           if (row[j] != rstart) break;
800         }
801         if (j < n) ncols = j-i;
802         else       ncols = n-i;
803         /* Now assemble all these values with a single function call */
804         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
805 
806         i = j;
807       }
808     }
809     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
810   }
811   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
812   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
813 
814   /* determine if any processor has disassembled, if so we must
815      also disassemble ourselfs, in order that we may reassemble. */
816   /*
817      if nonzero structure of submatrix B cannot change then we know that
818      no processor disassembled thus we can skip this stuff
819   */
820   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
821     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
822     if (mat->was_assembled && !other_disassembled) {
823       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
824     }
825   }
826   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
827     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
828   }
829   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
830   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
831   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
832 
833   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
834 
835   aij->rowvalues = 0;
836 
837   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
838   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
839 
840   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
841   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
842     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
843     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
844   }
845   PetscFunctionReturn(0);
846 }
847 
848 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
849 {
850   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
851   PetscErrorCode ierr;
852 
853   PetscFunctionBegin;
854   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
855   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
856   PetscFunctionReturn(0);
857 }
858 
859 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
860 {
861   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
862   PetscInt      *lrows;
863   PetscInt       r, len;
864   PetscBool      cong;
865   PetscErrorCode ierr;
866 
867   PetscFunctionBegin;
868   /* get locally owned rows */
869   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
870   /* fix right hand side if needed */
871   if (x && b) {
872     const PetscScalar *xx;
873     PetscScalar       *bb;
874 
875     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
876     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
877     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
878     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
879     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
880   }
881   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
882   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
883   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
884   if ((diag != 0.0) && cong) {
885     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
886   } else if (diag != 0.0) {
887     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
888     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
889     for (r = 0; r < len; ++r) {
890       const PetscInt row = lrows[r] + A->rmap->rstart;
891       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
892     }
893     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
894     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
895   } else {
896     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
897   }
898   ierr = PetscFree(lrows);CHKERRQ(ierr);
899 
900   /* only change matrix nonzero state if pattern was allowed to be changed */
901   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
902     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
903     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
904   }
905   PetscFunctionReturn(0);
906 }
907 
908 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
909 {
910   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
911   PetscErrorCode    ierr;
912   PetscMPIInt       n = A->rmap->n;
913   PetscInt          i,j,r,m,p = 0,len = 0;
914   PetscInt          *lrows,*owners = A->rmap->range;
915   PetscSFNode       *rrows;
916   PetscSF           sf;
917   const PetscScalar *xx;
918   PetscScalar       *bb,*mask;
919   Vec               xmask,lmask;
920   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
921   const PetscInt    *aj, *ii,*ridx;
922   PetscScalar       *aa;
923 
924   PetscFunctionBegin;
925   /* Create SF where leaves are input rows and roots are owned rows */
926   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
927   for (r = 0; r < n; ++r) lrows[r] = -1;
928   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
929   for (r = 0; r < N; ++r) {
930     const PetscInt idx   = rows[r];
931     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
932     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
933       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
934     }
935     rrows[r].rank  = p;
936     rrows[r].index = rows[r] - owners[p];
937   }
938   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
939   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
940   /* Collect flags for rows to be zeroed */
941   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
942   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
943   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
944   /* Compress and put in row numbers */
945   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
946   /* zero diagonal part of matrix */
947   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
948   /* handle off diagonal part of matrix */
949   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
950   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
951   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
952   for (i=0; i<len; i++) bb[lrows[i]] = 1;
953   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
954   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
955   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
956   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
957   if (x) {
958     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
960     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
961     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
962   }
963   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
964   /* remove zeroed rows of off diagonal matrix */
965   ii = aij->i;
966   for (i=0; i<len; i++) {
967     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
968   }
969   /* loop over all elements of off process part of matrix zeroing removed columns*/
970   if (aij->compressedrow.use) {
971     m    = aij->compressedrow.nrows;
972     ii   = aij->compressedrow.i;
973     ridx = aij->compressedrow.rindex;
974     for (i=0; i<m; i++) {
975       n  = ii[i+1] - ii[i];
976       aj = aij->j + ii[i];
977       aa = aij->a + ii[i];
978 
979       for (j=0; j<n; j++) {
980         if (PetscAbsScalar(mask[*aj])) {
981           if (b) bb[*ridx] -= *aa*xx[*aj];
982           *aa = 0.0;
983         }
984         aa++;
985         aj++;
986       }
987       ridx++;
988     }
989   } else { /* do not use compressed row format */
990     m = l->B->rmap->n;
991     for (i=0; i<m; i++) {
992       n  = ii[i+1] - ii[i];
993       aj = aij->j + ii[i];
994       aa = aij->a + ii[i];
995       for (j=0; j<n; j++) {
996         if (PetscAbsScalar(mask[*aj])) {
997           if (b) bb[i] -= *aa*xx[*aj];
998           *aa = 0.0;
999         }
1000         aa++;
1001         aj++;
1002       }
1003     }
1004   }
1005   if (x) {
1006     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1007     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1008   }
1009   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1010   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1011   ierr = PetscFree(lrows);CHKERRQ(ierr);
1012 
1013   /* only change matrix nonzero state if pattern was allowed to be changed */
1014   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1015     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1016     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1017   }
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1022 {
1023   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1024   PetscErrorCode ierr;
1025   PetscInt       nt;
1026   VecScatter     Mvctx = a->Mvctx;
1027 
1028   PetscFunctionBegin;
1029   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1030   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1031 
1032   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1034   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1040 {
1041   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1042   PetscErrorCode ierr;
1043 
1044   PetscFunctionBegin;
1045   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1050 {
1051   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1052   PetscErrorCode ierr;
1053   VecScatter     Mvctx = a->Mvctx;
1054 
1055   PetscFunctionBegin;
1056   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1057   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1058   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1059   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1060   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1065 {
1066   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1067   PetscErrorCode ierr;
1068   PetscBool      merged;
1069 
1070   PetscFunctionBegin;
1071   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1072   /* do nondiagonal part */
1073   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1074   if (!merged) {
1075     /* send it on its way */
1076     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1077     /* do local part */
1078     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1079     /* receive remote parts: note this assumes the values are not actually */
1080     /* added in yy until the next line, */
1081     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1082   } else {
1083     /* do local part */
1084     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1085     /* send it on its way */
1086     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1087     /* values actually were received in the Begin() but we need to call this nop */
1088     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1089   }
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1094 {
1095   MPI_Comm       comm;
1096   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1097   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1098   IS             Me,Notme;
1099   PetscErrorCode ierr;
1100   PetscInt       M,N,first,last,*notme,i;
1101   PetscMPIInt    size;
1102 
1103   PetscFunctionBegin;
1104   /* Easy test: symmetric diagonal block */
1105   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1106   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1107   if (!*f) PetscFunctionReturn(0);
1108   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1109   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1110   if (size == 1) PetscFunctionReturn(0);
1111 
1112   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1113   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1114   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1115   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1116   for (i=0; i<first; i++) notme[i] = i;
1117   for (i=last; i<M; i++) notme[i-last+first] = i;
1118   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1119   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1120   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1121   Aoff = Aoffs[0];
1122   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1123   Boff = Boffs[0];
1124   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1125   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1126   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1127   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1128   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1129   ierr = PetscFree(notme);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1134 {
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1143 {
1144   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1145   PetscErrorCode ierr;
1146 
1147   PetscFunctionBegin;
1148   /* do nondiagonal part */
1149   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1150   /* send it on its way */
1151   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1152   /* do local part */
1153   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1154   /* receive remote parts */
1155   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 /*
1160   This only works correctly for square matrices where the subblock A->A is the
1161    diagonal block
1162 */
1163 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1164 {
1165   PetscErrorCode ierr;
1166   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1167 
1168   PetscFunctionBegin;
1169   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1170   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1171   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1176 {
1177   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1178   PetscErrorCode ierr;
1179 
1180   PetscFunctionBegin;
1181   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1182   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1187 {
1188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192 #if defined(PETSC_USE_LOG)
1193   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1194 #endif
1195   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1196   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1197   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1198   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1199 #if defined(PETSC_USE_CTABLE)
1200   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1201 #else
1202   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1203 #endif
1204   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1205   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1206   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1207   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1208   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1209   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1210   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1211 
1212   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1213   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1221 #if defined(PETSC_HAVE_ELEMENTAL)
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1223 #endif
1224 #if defined(PETSC_HAVE_HYPRE)
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1227 #endif
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1234 {
1235   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1236   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1237   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1238   PetscErrorCode ierr;
1239   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1240   int            fd;
1241   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1242   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1243   PetscScalar    *column_values;
1244   PetscInt       message_count,flowcontrolcount;
1245   FILE           *file;
1246 
1247   PetscFunctionBegin;
1248   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1249   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1250   nz   = A->nz + B->nz;
1251   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1252   if (!rank) {
1253     header[0] = MAT_FILE_CLASSID;
1254     header[1] = mat->rmap->N;
1255     header[2] = mat->cmap->N;
1256 
1257     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1258     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1259     /* get largest number of rows any processor has */
1260     rlen  = mat->rmap->n;
1261     range = mat->rmap->range;
1262     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1263   } else {
1264     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1265     rlen = mat->rmap->n;
1266   }
1267 
1268   /* load up the local row counts */
1269   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1270   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1271 
1272   /* store the row lengths to the file */
1273   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1274   if (!rank) {
1275     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1276     for (i=1; i<size; i++) {
1277       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1278       rlen = range[i+1] - range[i];
1279       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1280       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     }
1282     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1283   } else {
1284     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1285     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1286     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1287   }
1288   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1289 
1290   /* load up the local column indices */
1291   nzmax = nz; /* th processor needs space a largest processor needs */
1292   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1293   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1294   cnt   = 0;
1295   for (i=0; i<mat->rmap->n; i++) {
1296     for (j=B->i[i]; j<B->i[i+1]; j++) {
1297       if ((col = garray[B->j[j]]) > cstart) break;
1298       column_indices[cnt++] = col;
1299     }
1300     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1301     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1302   }
1303   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1304 
1305   /* store the column indices to the file */
1306   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1307   if (!rank) {
1308     MPI_Status status;
1309     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1310     for (i=1; i<size; i++) {
1311       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1312       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1313       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1314       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1315       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1316     }
1317     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1318   } else {
1319     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1320     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1321     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1322     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1323   }
1324   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1325 
1326   /* load up the local column values */
1327   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1328   cnt  = 0;
1329   for (i=0; i<mat->rmap->n; i++) {
1330     for (j=B->i[i]; j<B->i[i+1]; j++) {
1331       if (garray[B->j[j]] > cstart) break;
1332       column_values[cnt++] = B->a[j];
1333     }
1334     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1335     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1336   }
1337   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1338 
1339   /* store the column values to the file */
1340   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1341   if (!rank) {
1342     MPI_Status status;
1343     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1344     for (i=1; i<size; i++) {
1345       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1346       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1347       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1348       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1349       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1350     }
1351     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1352   } else {
1353     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1354     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1355     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1357   }
1358   ierr = PetscFree(column_values);CHKERRQ(ierr);
1359 
1360   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1361   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #include <petscdraw.h>
1366 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1367 {
1368   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1369   PetscErrorCode    ierr;
1370   PetscMPIInt       rank = aij->rank,size = aij->size;
1371   PetscBool         isdraw,iascii,isbinary;
1372   PetscViewer       sviewer;
1373   PetscViewerFormat format;
1374 
1375   PetscFunctionBegin;
1376   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1377   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1378   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1379   if (iascii) {
1380     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1381     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1382       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1383       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1384       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1385       for (i=0; i<(PetscInt)size; i++) {
1386         nmax = PetscMax(nmax,nz[i]);
1387         nmin = PetscMin(nmin,nz[i]);
1388         navg += nz[i];
1389       }
1390       ierr = PetscFree(nz);CHKERRQ(ierr);
1391       navg = navg/size;
1392       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1393       PetscFunctionReturn(0);
1394     }
1395     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1396     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1397       MatInfo   info;
1398       PetscBool inodes;
1399 
1400       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1401       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1402       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1403       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1404       if (!inodes) {
1405         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1406                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1407       } else {
1408         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1409                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1410       }
1411       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1412       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1413       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1414       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1415       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1416       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1417       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1418       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1419       PetscFunctionReturn(0);
1420     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1421       PetscInt inodecount,inodelimit,*inodes;
1422       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1423       if (inodes) {
1424         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1425       } else {
1426         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1427       }
1428       PetscFunctionReturn(0);
1429     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1430       PetscFunctionReturn(0);
1431     }
1432   } else if (isbinary) {
1433     if (size == 1) {
1434       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1435       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1436     } else {
1437       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1438     }
1439     PetscFunctionReturn(0);
1440   } else if (isdraw) {
1441     PetscDraw draw;
1442     PetscBool isnull;
1443     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1444     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1445     if (isnull) PetscFunctionReturn(0);
1446   }
1447 
1448   {
1449     /* assemble the entire matrix onto first processor. */
1450     Mat        A;
1451     Mat_SeqAIJ *Aloc;
1452     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1453     MatScalar  *a;
1454 
1455     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1456     if (!rank) {
1457       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1458     } else {
1459       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1460     }
1461     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1462     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1463     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1464     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1465     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1466 
1467     /* copy over the A part */
1468     Aloc = (Mat_SeqAIJ*)aij->A->data;
1469     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1470     row  = mat->rmap->rstart;
1471     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1472     for (i=0; i<m; i++) {
1473       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1474       row++;
1475       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1476     }
1477     aj = Aloc->j;
1478     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1479 
1480     /* copy over the B part */
1481     Aloc = (Mat_SeqAIJ*)aij->B->data;
1482     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1483     row  = mat->rmap->rstart;
1484     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1485     ct   = cols;
1486     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1487     for (i=0; i<m; i++) {
1488       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1489       row++;
1490       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1491     }
1492     ierr = PetscFree(ct);CHKERRQ(ierr);
1493     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1494     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1495     /*
1496        Everyone has to call to draw the matrix since the graphics waits are
1497        synchronized across all processors that share the PetscDraw object
1498     */
1499     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1500     if (!rank) {
1501       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1502       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1503     }
1504     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1506     ierr = MatDestroy(&A);CHKERRQ(ierr);
1507   }
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1512 {
1513   PetscErrorCode ierr;
1514   PetscBool      iascii,isdraw,issocket,isbinary;
1515 
1516   PetscFunctionBegin;
1517   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1518   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1519   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1521   if (iascii || isdraw || isbinary || issocket) {
1522     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1528 {
1529   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1530   PetscErrorCode ierr;
1531   Vec            bb1 = 0;
1532   PetscBool      hasop;
1533 
1534   PetscFunctionBegin;
1535   if (flag == SOR_APPLY_UPPER) {
1536     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1537     PetscFunctionReturn(0);
1538   }
1539 
1540   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1541     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1542   }
1543 
1544   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1545     if (flag & SOR_ZERO_INITIAL_GUESS) {
1546       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1547       its--;
1548     }
1549 
1550     while (its--) {
1551       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553 
1554       /* update rhs: bb1 = bb - B*x */
1555       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1556       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1557 
1558       /* local sweep */
1559       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1560     }
1561   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1562     if (flag & SOR_ZERO_INITIAL_GUESS) {
1563       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1564       its--;
1565     }
1566     while (its--) {
1567       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569 
1570       /* update rhs: bb1 = bb - B*x */
1571       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1572       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1573 
1574       /* local sweep */
1575       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1576     }
1577   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1578     if (flag & SOR_ZERO_INITIAL_GUESS) {
1579       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1580       its--;
1581     }
1582     while (its--) {
1583       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585 
1586       /* update rhs: bb1 = bb - B*x */
1587       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1588       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1589 
1590       /* local sweep */
1591       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1592     }
1593   } else if (flag & SOR_EISENSTAT) {
1594     Vec xx1;
1595 
1596     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1597     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1598 
1599     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1600     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1601     if (!mat->diag) {
1602       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1603       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1604     }
1605     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1606     if (hasop) {
1607       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1608     } else {
1609       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1610     }
1611     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1612 
1613     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1614 
1615     /* local sweep */
1616     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1617     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1618     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1619   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1620 
1621   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1622 
1623   matin->factorerrortype = mat->A->factorerrortype;
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1628 {
1629   Mat            aA,aB,Aperm;
1630   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1631   PetscScalar    *aa,*ba;
1632   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1633   PetscSF        rowsf,sf;
1634   IS             parcolp = NULL;
1635   PetscBool      done;
1636   PetscErrorCode ierr;
1637 
1638   PetscFunctionBegin;
1639   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1640   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1641   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1642   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1643 
1644   /* Invert row permutation to find out where my rows should go */
1645   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1646   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1647   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1648   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1649   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1650   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1651 
1652   /* Invert column permutation to find out where my columns should go */
1653   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1654   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1655   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1656   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1657   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1658   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1659   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1660 
1661   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1662   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1663   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1664 
1665   /* Find out where my gcols should go */
1666   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1667   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1668   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1669   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1670   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1671   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1672   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1674 
1675   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1676   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1677   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1678   for (i=0; i<m; i++) {
1679     PetscInt row = rdest[i],rowner;
1680     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1681     for (j=ai[i]; j<ai[i+1]; j++) {
1682       PetscInt cowner,col = cdest[aj[j]];
1683       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1684       if (rowner == cowner) dnnz[i]++;
1685       else onnz[i]++;
1686     }
1687     for (j=bi[i]; j<bi[i+1]; j++) {
1688       PetscInt cowner,col = gcdest[bj[j]];
1689       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1690       if (rowner == cowner) dnnz[i]++;
1691       else onnz[i]++;
1692     }
1693   }
1694   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1695   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1696   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1697   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1698   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1699 
1700   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1701   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1702   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1703   for (i=0; i<m; i++) {
1704     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1705     PetscInt j0,rowlen;
1706     rowlen = ai[i+1] - ai[i];
1707     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1708       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1709       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1710     }
1711     rowlen = bi[i+1] - bi[i];
1712     for (j0=j=0; j<rowlen; j0=j) {
1713       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1714       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1715     }
1716   }
1717   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1718   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1719   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1720   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1721   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1722   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1723   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1724   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1725   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1726   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1727   *B = Aperm;
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1732 {
1733   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1734   PetscErrorCode ierr;
1735 
1736   PetscFunctionBegin;
1737   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1738   if (ghosts) *ghosts = aij->garray;
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1743 {
1744   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1745   Mat            A    = mat->A,B = mat->B;
1746   PetscErrorCode ierr;
1747   PetscReal      isend[5],irecv[5];
1748 
1749   PetscFunctionBegin;
1750   info->block_size = 1.0;
1751   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1752 
1753   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1754   isend[3] = info->memory;  isend[4] = info->mallocs;
1755 
1756   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1757 
1758   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1759   isend[3] += info->memory;  isend[4] += info->mallocs;
1760   if (flag == MAT_LOCAL) {
1761     info->nz_used      = isend[0];
1762     info->nz_allocated = isend[1];
1763     info->nz_unneeded  = isend[2];
1764     info->memory       = isend[3];
1765     info->mallocs      = isend[4];
1766   } else if (flag == MAT_GLOBAL_MAX) {
1767     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1768 
1769     info->nz_used      = irecv[0];
1770     info->nz_allocated = irecv[1];
1771     info->nz_unneeded  = irecv[2];
1772     info->memory       = irecv[3];
1773     info->mallocs      = irecv[4];
1774   } else if (flag == MAT_GLOBAL_SUM) {
1775     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1776 
1777     info->nz_used      = irecv[0];
1778     info->nz_allocated = irecv[1];
1779     info->nz_unneeded  = irecv[2];
1780     info->memory       = irecv[3];
1781     info->mallocs      = irecv[4];
1782   }
1783   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1784   info->fill_ratio_needed = 0;
1785   info->factor_mallocs    = 0;
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1790 {
1791   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1792   PetscErrorCode ierr;
1793 
1794   PetscFunctionBegin;
1795   switch (op) {
1796   case MAT_NEW_NONZERO_LOCATIONS:
1797   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1798   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1799   case MAT_KEEP_NONZERO_PATTERN:
1800   case MAT_NEW_NONZERO_LOCATION_ERR:
1801   case MAT_USE_INODES:
1802   case MAT_IGNORE_ZERO_ENTRIES:
1803     MatCheckPreallocated(A,1);
1804     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1805     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1806     break;
1807   case MAT_ROW_ORIENTED:
1808     MatCheckPreallocated(A,1);
1809     a->roworiented = flg;
1810 
1811     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1812     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1813     break;
1814   case MAT_NEW_DIAGONALS:
1815     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1816     break;
1817   case MAT_IGNORE_OFF_PROC_ENTRIES:
1818     a->donotstash = flg;
1819     break;
1820   case MAT_SPD:
1821     A->spd_set = PETSC_TRUE;
1822     A->spd     = flg;
1823     if (flg) {
1824       A->symmetric                  = PETSC_TRUE;
1825       A->structurally_symmetric     = PETSC_TRUE;
1826       A->symmetric_set              = PETSC_TRUE;
1827       A->structurally_symmetric_set = PETSC_TRUE;
1828     }
1829     break;
1830   case MAT_SYMMETRIC:
1831     MatCheckPreallocated(A,1);
1832     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1833     break;
1834   case MAT_STRUCTURALLY_SYMMETRIC:
1835     MatCheckPreallocated(A,1);
1836     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1837     break;
1838   case MAT_HERMITIAN:
1839     MatCheckPreallocated(A,1);
1840     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1841     break;
1842   case MAT_SYMMETRY_ETERNAL:
1843     MatCheckPreallocated(A,1);
1844     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1845     break;
1846   case MAT_SUBMAT_SINGLEIS:
1847     A->submat_singleis = flg;
1848     break;
1849   case MAT_STRUCTURE_ONLY:
1850     /* The option is handled directly by MatSetOption() */
1851     break;
1852   default:
1853     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1854   }
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1859 {
1860   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1861   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1862   PetscErrorCode ierr;
1863   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1864   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1865   PetscInt       *cmap,*idx_p;
1866 
1867   PetscFunctionBegin;
1868   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1869   mat->getrowactive = PETSC_TRUE;
1870 
1871   if (!mat->rowvalues && (idx || v)) {
1872     /*
1873         allocate enough space to hold information from the longest row.
1874     */
1875     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1876     PetscInt   max = 1,tmp;
1877     for (i=0; i<matin->rmap->n; i++) {
1878       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1879       if (max < tmp) max = tmp;
1880     }
1881     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1882   }
1883 
1884   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1885   lrow = row - rstart;
1886 
1887   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1888   if (!v)   {pvA = 0; pvB = 0;}
1889   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1890   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1891   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1892   nztot = nzA + nzB;
1893 
1894   cmap = mat->garray;
1895   if (v  || idx) {
1896     if (nztot) {
1897       /* Sort by increasing column numbers, assuming A and B already sorted */
1898       PetscInt imark = -1;
1899       if (v) {
1900         *v = v_p = mat->rowvalues;
1901         for (i=0; i<nzB; i++) {
1902           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1903           else break;
1904         }
1905         imark = i;
1906         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1907         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1908       }
1909       if (idx) {
1910         *idx = idx_p = mat->rowindices;
1911         if (imark > -1) {
1912           for (i=0; i<imark; i++) {
1913             idx_p[i] = cmap[cworkB[i]];
1914           }
1915         } else {
1916           for (i=0; i<nzB; i++) {
1917             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1918             else break;
1919           }
1920           imark = i;
1921         }
1922         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1923         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1924       }
1925     } else {
1926       if (idx) *idx = 0;
1927       if (v)   *v   = 0;
1928     }
1929   }
1930   *nz  = nztot;
1931   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1932   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1937 {
1938   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1939 
1940   PetscFunctionBegin;
1941   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1942   aij->getrowactive = PETSC_FALSE;
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1947 {
1948   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1949   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1950   PetscErrorCode ierr;
1951   PetscInt       i,j,cstart = mat->cmap->rstart;
1952   PetscReal      sum = 0.0;
1953   MatScalar      *v;
1954 
1955   PetscFunctionBegin;
1956   if (aij->size == 1) {
1957     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1958   } else {
1959     if (type == NORM_FROBENIUS) {
1960       v = amat->a;
1961       for (i=0; i<amat->nz; i++) {
1962         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1963       }
1964       v = bmat->a;
1965       for (i=0; i<bmat->nz; i++) {
1966         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1967       }
1968       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1969       *norm = PetscSqrtReal(*norm);
1970       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1971     } else if (type == NORM_1) { /* max column norm */
1972       PetscReal *tmp,*tmp2;
1973       PetscInt  *jj,*garray = aij->garray;
1974       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1975       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1976       *norm = 0.0;
1977       v     = amat->a; jj = amat->j;
1978       for (j=0; j<amat->nz; j++) {
1979         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1980       }
1981       v = bmat->a; jj = bmat->j;
1982       for (j=0; j<bmat->nz; j++) {
1983         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1984       }
1985       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1986       for (j=0; j<mat->cmap->N; j++) {
1987         if (tmp2[j] > *norm) *norm = tmp2[j];
1988       }
1989       ierr = PetscFree(tmp);CHKERRQ(ierr);
1990       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1991       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1992     } else if (type == NORM_INFINITY) { /* max row norm */
1993       PetscReal ntemp = 0.0;
1994       for (j=0; j<aij->A->rmap->n; j++) {
1995         v   = amat->a + amat->i[j];
1996         sum = 0.0;
1997         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1998           sum += PetscAbsScalar(*v); v++;
1999         }
2000         v = bmat->a + bmat->i[j];
2001         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2002           sum += PetscAbsScalar(*v); v++;
2003         }
2004         if (sum > ntemp) ntemp = sum;
2005       }
2006       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2007       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2008     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2009   }
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2014 {
2015   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2016   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2017   PetscErrorCode ierr;
2018   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2019   PetscInt       cstart = A->cmap->rstart,ncol;
2020   Mat            B;
2021   MatScalar      *array;
2022 
2023   PetscFunctionBegin;
2024   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2025   ai = Aloc->i; aj = Aloc->j;
2026   bi = Bloc->i; bj = Bloc->j;
2027   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2028     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2029     PetscSFNode          *oloc;
2030     PETSC_UNUSED PetscSF sf;
2031 
2032     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2033     /* compute d_nnz for preallocation */
2034     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2035     for (i=0; i<ai[ma]; i++) {
2036       d_nnz[aj[i]]++;
2037       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2038     }
2039     /* compute local off-diagonal contributions */
2040     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2041     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2042     /* map those to global */
2043     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2044     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2045     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2046     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2047     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2048     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2049     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2050 
2051     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2052     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2053     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2054     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2055     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2056     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2057   } else {
2058     B    = *matout;
2059     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2060     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2061   }
2062 
2063   /* copy over the A part */
2064   array = Aloc->a;
2065   row   = A->rmap->rstart;
2066   for (i=0; i<ma; i++) {
2067     ncol = ai[i+1]-ai[i];
2068     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2069     row++;
2070     array += ncol; aj += ncol;
2071   }
2072   aj = Aloc->j;
2073   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2074 
2075   /* copy over the B part */
2076   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2077   array = Bloc->a;
2078   row   = A->rmap->rstart;
2079   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2080   cols_tmp = cols;
2081   for (i=0; i<mb; i++) {
2082     ncol = bi[i+1]-bi[i];
2083     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2084     row++;
2085     array += ncol; cols_tmp += ncol;
2086   }
2087   ierr = PetscFree(cols);CHKERRQ(ierr);
2088 
2089   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2090   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2091   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2092     *matout = B;
2093   } else {
2094     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2095   }
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2100 {
2101   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2102   Mat            a    = aij->A,b = aij->B;
2103   PetscErrorCode ierr;
2104   PetscInt       s1,s2,s3;
2105 
2106   PetscFunctionBegin;
2107   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2108   if (rr) {
2109     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2110     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2111     /* Overlap communication with computation. */
2112     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113   }
2114   if (ll) {
2115     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2116     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2117     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2118   }
2119   /* scale  the diagonal block */
2120   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2121 
2122   if (rr) {
2123     /* Do a scatter end and then right scale the off-diagonal block */
2124     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2125     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2126   }
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2131 {
2132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2133   PetscErrorCode ierr;
2134 
2135   PetscFunctionBegin;
2136   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2141 {
2142   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2143   Mat            a,b,c,d;
2144   PetscBool      flg;
2145   PetscErrorCode ierr;
2146 
2147   PetscFunctionBegin;
2148   a = matA->A; b = matA->B;
2149   c = matB->A; d = matB->B;
2150 
2151   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2152   if (flg) {
2153     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2154   }
2155   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2160 {
2161   PetscErrorCode ierr;
2162   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2163   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2164 
2165   PetscFunctionBegin;
2166   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2167   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2168     /* because of the column compression in the off-processor part of the matrix a->B,
2169        the number of columns in a->B and b->B may be different, hence we cannot call
2170        the MatCopy() directly on the two parts. If need be, we can provide a more
2171        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2172        then copying the submatrices */
2173     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2174   } else {
2175     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2176     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2177   }
2178   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2183 {
2184   PetscErrorCode ierr;
2185 
2186   PetscFunctionBegin;
2187   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 /*
2192    Computes the number of nonzeros per row needed for preallocation when X and Y
2193    have different nonzero structure.
2194 */
2195 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2196 {
2197   PetscInt       i,j,k,nzx,nzy;
2198 
2199   PetscFunctionBegin;
2200   /* Set the number of nonzeros in the new matrix */
2201   for (i=0; i<m; i++) {
2202     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2203     nzx = xi[i+1] - xi[i];
2204     nzy = yi[i+1] - yi[i];
2205     nnz[i] = 0;
2206     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2207       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2208       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2209       nnz[i]++;
2210     }
2211     for (; k<nzy; k++) nnz[i]++;
2212   }
2213   PetscFunctionReturn(0);
2214 }
2215 
2216 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2217 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2218 {
2219   PetscErrorCode ierr;
2220   PetscInt       m = Y->rmap->N;
2221   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2222   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2223 
2224   PetscFunctionBegin;
2225   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2230 {
2231   PetscErrorCode ierr;
2232   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2233   PetscBLASInt   bnz,one=1;
2234   Mat_SeqAIJ     *x,*y;
2235 
2236   PetscFunctionBegin;
2237   if (str == SAME_NONZERO_PATTERN) {
2238     PetscScalar alpha = a;
2239     x    = (Mat_SeqAIJ*)xx->A->data;
2240     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2241     y    = (Mat_SeqAIJ*)yy->A->data;
2242     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2243     x    = (Mat_SeqAIJ*)xx->B->data;
2244     y    = (Mat_SeqAIJ*)yy->B->data;
2245     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2246     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2247     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2248   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2249     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2250   } else {
2251     Mat      B;
2252     PetscInt *nnz_d,*nnz_o;
2253     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2254     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2255     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2256     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2257     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2258     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2259     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2260     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2261     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2262     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2263     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2264     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2265     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2266     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2267   }
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2272 
2273 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2274 {
2275 #if defined(PETSC_USE_COMPLEX)
2276   PetscErrorCode ierr;
2277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2278 
2279   PetscFunctionBegin;
2280   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2281   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2282 #else
2283   PetscFunctionBegin;
2284 #endif
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2289 {
2290   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2291   PetscErrorCode ierr;
2292 
2293   PetscFunctionBegin;
2294   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2295   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2300 {
2301   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2302   PetscErrorCode ierr;
2303 
2304   PetscFunctionBegin;
2305   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2306   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2311 {
2312   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2313   PetscErrorCode ierr;
2314   PetscInt       i,*idxb = 0;
2315   PetscScalar    *va,*vb;
2316   Vec            vtmp;
2317 
2318   PetscFunctionBegin;
2319   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2320   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2321   if (idx) {
2322     for (i=0; i<A->rmap->n; i++) {
2323       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2324     }
2325   }
2326 
2327   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2328   if (idx) {
2329     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2330   }
2331   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2332   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2333 
2334   for (i=0; i<A->rmap->n; i++) {
2335     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2336       va[i] = vb[i];
2337       if (idx) idx[i] = a->garray[idxb[i]];
2338     }
2339   }
2340 
2341   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2342   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2343   ierr = PetscFree(idxb);CHKERRQ(ierr);
2344   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2349 {
2350   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2351   PetscErrorCode ierr;
2352   PetscInt       i,*idxb = 0;
2353   PetscScalar    *va,*vb;
2354   Vec            vtmp;
2355 
2356   PetscFunctionBegin;
2357   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2358   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2359   if (idx) {
2360     for (i=0; i<A->cmap->n; i++) {
2361       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2362     }
2363   }
2364 
2365   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2366   if (idx) {
2367     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2368   }
2369   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2370   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2371 
2372   for (i=0; i<A->rmap->n; i++) {
2373     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2374       va[i] = vb[i];
2375       if (idx) idx[i] = a->garray[idxb[i]];
2376     }
2377   }
2378 
2379   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2380   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2381   ierr = PetscFree(idxb);CHKERRQ(ierr);
2382   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2387 {
2388   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2389   PetscInt       n      = A->rmap->n;
2390   PetscInt       cstart = A->cmap->rstart;
2391   PetscInt       *cmap  = mat->garray;
2392   PetscInt       *diagIdx, *offdiagIdx;
2393   Vec            diagV, offdiagV;
2394   PetscScalar    *a, *diagA, *offdiagA;
2395   PetscInt       r;
2396   PetscErrorCode ierr;
2397 
2398   PetscFunctionBegin;
2399   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2400   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2401   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2402   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2403   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2404   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2405   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2406   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2407   for (r = 0; r < n; ++r) {
2408     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2409       a[r]   = diagA[r];
2410       idx[r] = cstart + diagIdx[r];
2411     } else {
2412       a[r]   = offdiagA[r];
2413       idx[r] = cmap[offdiagIdx[r]];
2414     }
2415   }
2416   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2417   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2418   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2419   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2420   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2421   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2426 {
2427   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2428   PetscInt       n      = A->rmap->n;
2429   PetscInt       cstart = A->cmap->rstart;
2430   PetscInt       *cmap  = mat->garray;
2431   PetscInt       *diagIdx, *offdiagIdx;
2432   Vec            diagV, offdiagV;
2433   PetscScalar    *a, *diagA, *offdiagA;
2434   PetscInt       r;
2435   PetscErrorCode ierr;
2436 
2437   PetscFunctionBegin;
2438   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2439   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2440   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2441   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2442   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2443   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2444   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2445   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2446   for (r = 0; r < n; ++r) {
2447     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2448       a[r]   = diagA[r];
2449       idx[r] = cstart + diagIdx[r];
2450     } else {
2451       a[r]   = offdiagA[r];
2452       idx[r] = cmap[offdiagIdx[r]];
2453     }
2454   }
2455   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2456   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2457   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2458   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2459   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2460   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2465 {
2466   PetscErrorCode ierr;
2467   Mat            *dummy;
2468 
2469   PetscFunctionBegin;
2470   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2471   *newmat = *dummy;
2472   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2477 {
2478   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2479   PetscErrorCode ierr;
2480 
2481   PetscFunctionBegin;
2482   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2483   A->factorerrortype = a->A->factorerrortype;
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2488 {
2489   PetscErrorCode ierr;
2490   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2491 
2492   PetscFunctionBegin;
2493   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2494   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2495   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2496   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2501 {
2502   PetscFunctionBegin;
2503   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2504   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 /*@
2509    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2510 
2511    Collective on Mat
2512 
2513    Input Parameters:
2514 +    A - the matrix
2515 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2516 
2517  Level: advanced
2518 
2519 @*/
2520 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2521 {
2522   PetscErrorCode       ierr;
2523 
2524   PetscFunctionBegin;
2525   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2526   PetscFunctionReturn(0);
2527 }
2528 
2529 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2530 {
2531   PetscErrorCode       ierr;
2532   PetscBool            sc = PETSC_FALSE,flg;
2533 
2534   PetscFunctionBegin;
2535   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2536   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2537   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2538   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2539   if (flg) {
2540     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2541   }
2542   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2547 {
2548   PetscErrorCode ierr;
2549   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2550   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2551 
2552   PetscFunctionBegin;
2553   if (!Y->preallocated) {
2554     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2555   } else if (!aij->nz) {
2556     PetscInt nonew = aij->nonew;
2557     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2558     aij->nonew = nonew;
2559   }
2560   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2565 {
2566   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2567   PetscErrorCode ierr;
2568 
2569   PetscFunctionBegin;
2570   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2571   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2572   if (d) {
2573     PetscInt rstart;
2574     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2575     *d += rstart;
2576 
2577   }
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 
2582 /* -------------------------------------------------------------------*/
2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2584                                        MatGetRow_MPIAIJ,
2585                                        MatRestoreRow_MPIAIJ,
2586                                        MatMult_MPIAIJ,
2587                                 /* 4*/ MatMultAdd_MPIAIJ,
2588                                        MatMultTranspose_MPIAIJ,
2589                                        MatMultTransposeAdd_MPIAIJ,
2590                                        0,
2591                                        0,
2592                                        0,
2593                                 /*10*/ 0,
2594                                        0,
2595                                        0,
2596                                        MatSOR_MPIAIJ,
2597                                        MatTranspose_MPIAIJ,
2598                                 /*15*/ MatGetInfo_MPIAIJ,
2599                                        MatEqual_MPIAIJ,
2600                                        MatGetDiagonal_MPIAIJ,
2601                                        MatDiagonalScale_MPIAIJ,
2602                                        MatNorm_MPIAIJ,
2603                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2604                                        MatAssemblyEnd_MPIAIJ,
2605                                        MatSetOption_MPIAIJ,
2606                                        MatZeroEntries_MPIAIJ,
2607                                 /*24*/ MatZeroRows_MPIAIJ,
2608                                        0,
2609                                        0,
2610                                        0,
2611                                        0,
2612                                 /*29*/ MatSetUp_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                        MatGetDiagonalBlock_MPIAIJ,
2616                                        0,
2617                                 /*34*/ MatDuplicate_MPIAIJ,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                        0,
2622                                 /*39*/ MatAXPY_MPIAIJ,
2623                                        MatCreateSubMatrices_MPIAIJ,
2624                                        MatIncreaseOverlap_MPIAIJ,
2625                                        MatGetValues_MPIAIJ,
2626                                        MatCopy_MPIAIJ,
2627                                 /*44*/ MatGetRowMax_MPIAIJ,
2628                                        MatScale_MPIAIJ,
2629                                        MatShift_MPIAIJ,
2630                                        MatDiagonalSet_MPIAIJ,
2631                                        MatZeroRowsColumns_MPIAIJ,
2632                                 /*49*/ MatSetRandom_MPIAIJ,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2638                                        0,
2639                                        MatSetUnfactored_MPIAIJ,
2640                                        MatPermute_MPIAIJ,
2641                                        0,
2642                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2643                                        MatDestroy_MPIAIJ,
2644                                        MatView_MPIAIJ,
2645                                        0,
2646                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2647                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2648                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2653                                        MatGetRowMinAbs_MPIAIJ,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                 /*75*/ MatFDColoringApply_AIJ,
2659                                        MatSetFromOptions_MPIAIJ,
2660                                        0,
2661                                        0,
2662                                        MatFindZeroDiagonals_MPIAIJ,
2663                                 /*80*/ 0,
2664                                        0,
2665                                        0,
2666                                 /*83*/ MatLoad_MPIAIJ,
2667                                        MatIsSymmetric_MPIAIJ,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2673                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2674                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2675                                        MatPtAP_MPIAIJ_MPIAIJ,
2676                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2677                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                 /*99*/ 0,
2683                                        0,
2684                                        0,
2685                                        MatConjugate_MPIAIJ,
2686                                        0,
2687                                 /*104*/MatSetValuesRow_MPIAIJ,
2688                                        MatRealPart_MPIAIJ,
2689                                        MatImaginaryPart_MPIAIJ,
2690                                        0,
2691                                        0,
2692                                 /*109*/0,
2693                                        0,
2694                                        MatGetRowMin_MPIAIJ,
2695                                        0,
2696                                        MatMissingDiagonal_MPIAIJ,
2697                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2698                                        0,
2699                                        MatGetGhosts_MPIAIJ,
2700                                        0,
2701                                        0,
2702                                 /*119*/0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        MatGetMultiProcBlock_MPIAIJ,
2707                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2708                                        MatGetColumnNorms_MPIAIJ,
2709                                        MatInvertBlockDiagonal_MPIAIJ,
2710                                        0,
2711                                        MatCreateSubMatricesMPI_MPIAIJ,
2712                                 /*129*/0,
2713                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2714                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2715                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2716                                        0,
2717                                 /*134*/0,
2718                                        0,
2719                                        MatRARt_MPIAIJ_MPIAIJ,
2720                                        0,
2721                                        0,
2722                                 /*139*/MatSetBlockSizes_MPIAIJ,
2723                                        0,
2724                                        0,
2725                                        MatFDColoringSetUp_MPIXAIJ,
2726                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2727                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2728 };
2729 
2730 /* ----------------------------------------------------------------------------------------*/
2731 
2732 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2733 {
2734   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2735   PetscErrorCode ierr;
2736 
2737   PetscFunctionBegin;
2738   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2739   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2744 {
2745   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2746   PetscErrorCode ierr;
2747 
2748   PetscFunctionBegin;
2749   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2750   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2751   PetscFunctionReturn(0);
2752 }
2753 
2754 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2755 {
2756   Mat_MPIAIJ     *b;
2757   PetscErrorCode ierr;
2758 
2759   PetscFunctionBegin;
2760   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2761   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2762   b = (Mat_MPIAIJ*)B->data;
2763 
2764 #if defined(PETSC_USE_CTABLE)
2765   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2766 #else
2767   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2768 #endif
2769   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2770   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2771   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2772 
2773   /* Because the B will have been resized we simply destroy it and create a new one each time */
2774   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2775   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2776   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2777   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2778   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2779   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2780 
2781   if (!B->preallocated) {
2782     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2783     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2784     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2785     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2786     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2787   }
2788 
2789   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2790   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2791   B->preallocated  = PETSC_TRUE;
2792   B->was_assembled = PETSC_FALSE;
2793   B->assembled     = PETSC_FALSE;;
2794   PetscFunctionReturn(0);
2795 }
2796 
2797 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2798 {
2799   Mat_MPIAIJ     *b;
2800   PetscErrorCode ierr;
2801 
2802   PetscFunctionBegin;
2803   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2804   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2805   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2806   b = (Mat_MPIAIJ*)B->data;
2807 
2808 #if defined(PETSC_USE_CTABLE)
2809   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2810 #else
2811   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2812 #endif
2813   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2814   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2815   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2816 
2817   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2818   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2819   B->preallocated  = PETSC_TRUE;
2820   B->was_assembled = PETSC_FALSE;
2821   B->assembled = PETSC_FALSE;
2822   PetscFunctionReturn(0);
2823 }
2824 
2825 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2826 {
2827   Mat            mat;
2828   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2829   PetscErrorCode ierr;
2830 
2831   PetscFunctionBegin;
2832   *newmat = 0;
2833   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2834   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2835   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2836   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2837   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2838   a       = (Mat_MPIAIJ*)mat->data;
2839 
2840   mat->factortype   = matin->factortype;
2841   mat->assembled    = PETSC_TRUE;
2842   mat->insertmode   = NOT_SET_VALUES;
2843   mat->preallocated = PETSC_TRUE;
2844 
2845   a->size         = oldmat->size;
2846   a->rank         = oldmat->rank;
2847   a->donotstash   = oldmat->donotstash;
2848   a->roworiented  = oldmat->roworiented;
2849   a->rowindices   = 0;
2850   a->rowvalues    = 0;
2851   a->getrowactive = PETSC_FALSE;
2852 
2853   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2854   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2855 
2856   if (oldmat->colmap) {
2857 #if defined(PETSC_USE_CTABLE)
2858     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2859 #else
2860     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2861     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2862     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2863 #endif
2864   } else a->colmap = 0;
2865   if (oldmat->garray) {
2866     PetscInt len;
2867     len  = oldmat->B->cmap->n;
2868     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2869     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2870     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2871   } else a->garray = 0;
2872 
2873   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2874   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2875   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2876   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2877 
2878   if (oldmat->Mvctx_mpi1) {
2879     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2880     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2881   }
2882 
2883   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2884   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2885   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2886   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2887   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2888   *newmat = mat;
2889   PetscFunctionReturn(0);
2890 }
2891 
2892 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2893 {
2894   PetscScalar    *vals,*svals;
2895   MPI_Comm       comm;
2896   PetscErrorCode ierr;
2897   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2898   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2899   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2900   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2901   PetscInt       cend,cstart,n,*rowners;
2902   int            fd;
2903   PetscInt       bs = newMat->rmap->bs;
2904 
2905   PetscFunctionBegin;
2906   /* force binary viewer to load .info file if it has not yet done so */
2907   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2908   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2909   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2910   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2911   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2912   if (!rank) {
2913     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2914     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2915     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2916   }
2917 
2918   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2919   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2920   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2921   if (bs < 0) bs = 1;
2922 
2923   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2924   M    = header[1]; N = header[2];
2925 
2926   /* If global sizes are set, check if they are consistent with that given in the file */
2927   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2928   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2929 
2930   /* determine ownership of all (block) rows */
2931   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2932   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2933   else m = newMat->rmap->n; /* Set by user */
2934 
2935   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2936   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2937 
2938   /* First process needs enough room for process with most rows */
2939   if (!rank) {
2940     mmax = rowners[1];
2941     for (i=2; i<=size; i++) {
2942       mmax = PetscMax(mmax, rowners[i]);
2943     }
2944   } else mmax = -1;             /* unused, but compilers complain */
2945 
2946   rowners[0] = 0;
2947   for (i=2; i<=size; i++) {
2948     rowners[i] += rowners[i-1];
2949   }
2950   rstart = rowners[rank];
2951   rend   = rowners[rank+1];
2952 
2953   /* distribute row lengths to all processors */
2954   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2955   if (!rank) {
2956     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2957     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2958     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2959     for (j=0; j<m; j++) {
2960       procsnz[0] += ourlens[j];
2961     }
2962     for (i=1; i<size; i++) {
2963       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2964       /* calculate the number of nonzeros on each processor */
2965       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2966         procsnz[i] += rowlengths[j];
2967       }
2968       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2969     }
2970     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2971   } else {
2972     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2973   }
2974 
2975   if (!rank) {
2976     /* determine max buffer needed and allocate it */
2977     maxnz = 0;
2978     for (i=0; i<size; i++) {
2979       maxnz = PetscMax(maxnz,procsnz[i]);
2980     }
2981     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2982 
2983     /* read in my part of the matrix column indices  */
2984     nz   = procsnz[0];
2985     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2986     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2987 
2988     /* read in every one elses and ship off */
2989     for (i=1; i<size; i++) {
2990       nz   = procsnz[i];
2991       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2992       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2993     }
2994     ierr = PetscFree(cols);CHKERRQ(ierr);
2995   } else {
2996     /* determine buffer space needed for message */
2997     nz = 0;
2998     for (i=0; i<m; i++) {
2999       nz += ourlens[i];
3000     }
3001     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3002 
3003     /* receive message of column indices*/
3004     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3005   }
3006 
3007   /* determine column ownership if matrix is not square */
3008   if (N != M) {
3009     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3010     else n = newMat->cmap->n;
3011     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3012     cstart = cend - n;
3013   } else {
3014     cstart = rstart;
3015     cend   = rend;
3016     n      = cend - cstart;
3017   }
3018 
3019   /* loop over local rows, determining number of off diagonal entries */
3020   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3021   jj   = 0;
3022   for (i=0; i<m; i++) {
3023     for (j=0; j<ourlens[i]; j++) {
3024       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3025       jj++;
3026     }
3027   }
3028 
3029   for (i=0; i<m; i++) {
3030     ourlens[i] -= offlens[i];
3031   }
3032   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3033 
3034   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3035 
3036   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3037 
3038   for (i=0; i<m; i++) {
3039     ourlens[i] += offlens[i];
3040   }
3041 
3042   if (!rank) {
3043     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3044 
3045     /* read in my part of the matrix numerical values  */
3046     nz   = procsnz[0];
3047     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3048 
3049     /* insert into matrix */
3050     jj      = rstart;
3051     smycols = mycols;
3052     svals   = vals;
3053     for (i=0; i<m; i++) {
3054       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3055       smycols += ourlens[i];
3056       svals   += ourlens[i];
3057       jj++;
3058     }
3059 
3060     /* read in other processors and ship out */
3061     for (i=1; i<size; i++) {
3062       nz   = procsnz[i];
3063       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3064       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3065     }
3066     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3067   } else {
3068     /* receive numeric values */
3069     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3070 
3071     /* receive message of values*/
3072     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3073 
3074     /* insert into matrix */
3075     jj      = rstart;
3076     smycols = mycols;
3077     svals   = vals;
3078     for (i=0; i<m; i++) {
3079       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3080       smycols += ourlens[i];
3081       svals   += ourlens[i];
3082       jj++;
3083     }
3084   }
3085   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3086   ierr = PetscFree(vals);CHKERRQ(ierr);
3087   ierr = PetscFree(mycols);CHKERRQ(ierr);
3088   ierr = PetscFree(rowners);CHKERRQ(ierr);
3089   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3090   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3091   PetscFunctionReturn(0);
3092 }
3093 
3094 /* Not scalable because of ISAllGather() unless getting all columns. */
3095 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3096 {
3097   PetscErrorCode ierr;
3098   IS             iscol_local;
3099   PetscBool      isstride;
3100   PetscMPIInt    lisstride=0,gisstride;
3101 
3102   PetscFunctionBegin;
3103   /* check if we are grabbing all columns*/
3104   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3105 
3106   if (isstride) {
3107     PetscInt  start,len,mstart,mlen;
3108     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3109     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3110     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3111     if (mstart == start && mlen-mstart == len) lisstride = 1;
3112   }
3113 
3114   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3115   if (gisstride) {
3116     PetscInt N;
3117     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3118     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3119     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3120     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3121   } else {
3122     PetscInt cbs;
3123     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3124     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3125     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3126   }
3127 
3128   *isseq = iscol_local;
3129   PetscFunctionReturn(0);
3130 }
3131 
3132 /*
3133  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3134  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3135 
3136  Input Parameters:
3137    mat - matrix
3138    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3139            i.e., mat->rstart <= isrow[i] < mat->rend
3140    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3141            i.e., mat->cstart <= iscol[i] < mat->cend
3142  Output Parameter:
3143    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3144    iscol_o - sequential column index set for retrieving mat->B
3145    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3146  */
3147 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3148 {
3149   PetscErrorCode ierr;
3150   Vec            x,cmap;
3151   const PetscInt *is_idx;
3152   PetscScalar    *xarray,*cmaparray;
3153   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3154   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3155   Mat            B=a->B;
3156   Vec            lvec=a->lvec,lcmap;
3157   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3158   MPI_Comm       comm;
3159   VecScatter     Mvctx=a->Mvctx;
3160 
3161   PetscFunctionBegin;
3162   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3163   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3164 
3165   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3166   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3167   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3168   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3169   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3170 
3171   /* Get start indices */
3172   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3173   isstart -= ncols;
3174   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3175 
3176   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3177   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3178   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3179   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3180   for (i=0; i<ncols; i++) {
3181     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3182     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3183     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3184   }
3185   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3186   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3187   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3188 
3189   /* Get iscol_d */
3190   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3191   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3192   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3193 
3194   /* Get isrow_d */
3195   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3196   rstart = mat->rmap->rstart;
3197   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3198   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3199   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3200   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3201 
3202   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3203   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3204   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3205 
3206   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3207   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3208   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3209 
3210   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3211 
3212   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3213   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3214 
3215   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3216   /* off-process column indices */
3217   count = 0;
3218   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3219   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3220 
3221   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3222   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3223   for (i=0; i<Bn; i++) {
3224     if (PetscRealPart(xarray[i]) > -1.0) {
3225       idx[count]     = i;                   /* local column index in off-diagonal part B */
3226       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3227       count++;
3228     }
3229   }
3230   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3231   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3232 
3233   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3234   /* cannot ensure iscol_o has same blocksize as iscol! */
3235 
3236   ierr = PetscFree(idx);CHKERRQ(ierr);
3237   *garray = cmap1;
3238 
3239   ierr = VecDestroy(&x);CHKERRQ(ierr);
3240   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3241   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3242   PetscFunctionReturn(0);
3243 }
3244 
3245 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3246 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3247 {
3248   PetscErrorCode ierr;
3249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3250   Mat            M = NULL;
3251   MPI_Comm       comm;
3252   IS             iscol_d,isrow_d,iscol_o;
3253   Mat            Asub = NULL,Bsub = NULL;
3254   PetscInt       n;
3255 
3256   PetscFunctionBegin;
3257   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3258 
3259   if (call == MAT_REUSE_MATRIX) {
3260     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3261     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3262     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3263 
3264     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3265     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3266 
3267     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3268     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3269 
3270     /* Update diagonal and off-diagonal portions of submat */
3271     asub = (Mat_MPIAIJ*)(*submat)->data;
3272     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3273     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3274     if (n) {
3275       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3276     }
3277     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3278     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3279 
3280   } else { /* call == MAT_INITIAL_MATRIX) */
3281     const PetscInt *garray;
3282     PetscInt        BsubN;
3283 
3284     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3285     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3286 
3287     /* Create local submatrices Asub and Bsub */
3288     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3289     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3290 
3291     /* Create submatrix M */
3292     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3293 
3294     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3295     asub = (Mat_MPIAIJ*)M->data;
3296 
3297     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3298     n = asub->B->cmap->N;
3299     if (BsubN > n) {
3300       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3301       const PetscInt *idx;
3302       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3303       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3304 
3305       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3306       j = 0;
3307       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3308       for (i=0; i<n; i++) {
3309         if (j >= BsubN) break;
3310         while (subgarray[i] > garray[j]) j++;
3311 
3312         if (subgarray[i] == garray[j]) {
3313           idx_new[i] = idx[j++];
3314         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3315       }
3316       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3317 
3318       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3319       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3320 
3321     } else if (BsubN < n) {
3322       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3323     }
3324 
3325     ierr = PetscFree(garray);CHKERRQ(ierr);
3326     *submat = M;
3327 
3328     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3329     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3330     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3331 
3332     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3333     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3334 
3335     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3336     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3337   }
3338   PetscFunctionReturn(0);
3339 }
3340 
3341 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3342 {
3343   PetscErrorCode ierr;
3344   IS             iscol_local=NULL,isrow_d;
3345   PetscInt       csize;
3346   PetscInt       n,i,j,start,end;
3347   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3348   MPI_Comm       comm;
3349 
3350   PetscFunctionBegin;
3351   /* If isrow has same processor distribution as mat,
3352      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3353   if (call == MAT_REUSE_MATRIX) {
3354     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3355     if (isrow_d) {
3356       sameRowDist  = PETSC_TRUE;
3357       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3358     } else {
3359       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3360       if (iscol_local) {
3361         sameRowDist  = PETSC_TRUE;
3362         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3363       }
3364     }
3365   } else {
3366     /* Check if isrow has same processor distribution as mat */
3367     sameDist[0] = PETSC_FALSE;
3368     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3369     if (!n) {
3370       sameDist[0] = PETSC_TRUE;
3371     } else {
3372       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3373       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3374       if (i >= start && j < end) {
3375         sameDist[0] = PETSC_TRUE;
3376       }
3377     }
3378 
3379     /* Check if iscol has same processor distribution as mat */
3380     sameDist[1] = PETSC_FALSE;
3381     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3382     if (!n) {
3383       sameDist[1] = PETSC_TRUE;
3384     } else {
3385       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3386       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3387       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3388     }
3389 
3390     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3391     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3392     sameRowDist = tsameDist[0];
3393   }
3394 
3395   if (sameRowDist) {
3396     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3397       /* isrow and iscol have same processor distribution as mat */
3398       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3399       PetscFunctionReturn(0);
3400     } else { /* sameRowDist */
3401       /* isrow has same processor distribution as mat */
3402       if (call == MAT_INITIAL_MATRIX) {
3403         PetscBool sorted;
3404         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3405         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3406         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3407         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3408 
3409         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3410         if (sorted) {
3411           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3412           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3413           PetscFunctionReturn(0);
3414         }
3415       } else { /* call == MAT_REUSE_MATRIX */
3416         IS    iscol_sub;
3417         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3418         if (iscol_sub) {
3419           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3420           PetscFunctionReturn(0);
3421         }
3422       }
3423     }
3424   }
3425 
3426   /* General case: iscol -> iscol_local which has global size of iscol */
3427   if (call == MAT_REUSE_MATRIX) {
3428     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3429     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3430   } else {
3431     if (!iscol_local) {
3432       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3433     }
3434   }
3435 
3436   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3437   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3438 
3439   if (call == MAT_INITIAL_MATRIX) {
3440     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3441     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3442   }
3443   PetscFunctionReturn(0);
3444 }
3445 
3446 /*@C
3447      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3448          and "off-diagonal" part of the matrix in CSR format.
3449 
3450    Collective on MPI_Comm
3451 
3452    Input Parameters:
3453 +  comm - MPI communicator
3454 .  A - "diagonal" portion of matrix
3455 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3456 -  garray - global index of B columns
3457 
3458    Output Parameter:
3459 .   mat - the matrix, with input A as its local diagonal matrix
3460    Level: advanced
3461 
3462    Notes:
3463        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3464        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3465 
3466 .seealso: MatCreateMPIAIJWithSplitArrays()
3467 @*/
3468 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3469 {
3470   PetscErrorCode ierr;
3471   Mat_MPIAIJ     *maij;
3472   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3473   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3474   PetscScalar    *oa=b->a;
3475   Mat            Bnew;
3476   PetscInt       m,n,N;
3477 
3478   PetscFunctionBegin;
3479   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3480   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3481   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3482   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3483   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3484   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3485 
3486   /* Get global columns of mat */
3487   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3488 
3489   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3490   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3491   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3492   maij = (Mat_MPIAIJ*)(*mat)->data;
3493 
3494   (*mat)->preallocated = PETSC_TRUE;
3495 
3496   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3497   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3498 
3499   /* Set A as diagonal portion of *mat */
3500   maij->A = A;
3501 
3502   nz = oi[m];
3503   for (i=0; i<nz; i++) {
3504     col   = oj[i];
3505     oj[i] = garray[col];
3506   }
3507 
3508    /* Set Bnew as off-diagonal portion of *mat */
3509   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3510   bnew        = (Mat_SeqAIJ*)Bnew->data;
3511   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3512   maij->B     = Bnew;
3513 
3514   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3515 
3516   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3517   b->free_a       = PETSC_FALSE;
3518   b->free_ij      = PETSC_FALSE;
3519   ierr = MatDestroy(&B);CHKERRQ(ierr);
3520 
3521   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3522   bnew->free_a       = PETSC_TRUE;
3523   bnew->free_ij      = PETSC_TRUE;
3524 
3525   /* condense columns of maij->B */
3526   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3527   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3528   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3529   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3530   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3531   PetscFunctionReturn(0);
3532 }
3533 
3534 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3535 
3536 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3537 {
3538   PetscErrorCode ierr;
3539   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3540   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3541   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3542   Mat            M,Msub,B=a->B;
3543   MatScalar      *aa;
3544   Mat_SeqAIJ     *aij;
3545   PetscInt       *garray = a->garray,*colsub,Ncols;
3546   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3547   IS             iscol_sub,iscmap;
3548   const PetscInt *is_idx,*cmap;
3549   PetscBool      allcolumns=PETSC_FALSE;
3550   MPI_Comm       comm;
3551 
3552   PetscFunctionBegin;
3553   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3554 
3555   if (call == MAT_REUSE_MATRIX) {
3556     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3557     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3558     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3559 
3560     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3561     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3562 
3563     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3564     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3565 
3566     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3567 
3568   } else { /* call == MAT_INITIAL_MATRIX) */
3569     PetscBool flg;
3570 
3571     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3572     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3573 
3574     /* (1) iscol -> nonscalable iscol_local */
3575     /* Check for special case: each processor gets entire matrix columns */
3576     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3577     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3578     if (allcolumns) {
3579       iscol_sub = iscol_local;
3580       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3581       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3582 
3583     } else {
3584       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3585       PetscInt *idx,*cmap1,k;
3586       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3587       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3588       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3589       count = 0;
3590       k     = 0;
3591       for (i=0; i<Ncols; i++) {
3592         j = is_idx[i];
3593         if (j >= cstart && j < cend) {
3594           /* diagonal part of mat */
3595           idx[count]     = j;
3596           cmap1[count++] = i; /* column index in submat */
3597         } else if (Bn) {
3598           /* off-diagonal part of mat */
3599           if (j == garray[k]) {
3600             idx[count]     = j;
3601             cmap1[count++] = i;  /* column index in submat */
3602           } else if (j > garray[k]) {
3603             while (j > garray[k] && k < Bn-1) k++;
3604             if (j == garray[k]) {
3605               idx[count]     = j;
3606               cmap1[count++] = i; /* column index in submat */
3607             }
3608           }
3609         }
3610       }
3611       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3612 
3613       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3614       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3615       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3616 
3617       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3618     }
3619 
3620     /* (3) Create sequential Msub */
3621     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3622   }
3623 
3624   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3625   aij  = (Mat_SeqAIJ*)(Msub)->data;
3626   ii   = aij->i;
3627   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3628 
3629   /*
3630       m - number of local rows
3631       Ncols - number of columns (same on all processors)
3632       rstart - first row in new global matrix generated
3633   */
3634   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3635 
3636   if (call == MAT_INITIAL_MATRIX) {
3637     /* (4) Create parallel newmat */
3638     PetscMPIInt    rank,size;
3639     PetscInt       csize;
3640 
3641     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3642     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3643 
3644     /*
3645         Determine the number of non-zeros in the diagonal and off-diagonal
3646         portions of the matrix in order to do correct preallocation
3647     */
3648 
3649     /* first get start and end of "diagonal" columns */
3650     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3651     if (csize == PETSC_DECIDE) {
3652       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3653       if (mglobal == Ncols) { /* square matrix */
3654         nlocal = m;
3655       } else {
3656         nlocal = Ncols/size + ((Ncols % size) > rank);
3657       }
3658     } else {
3659       nlocal = csize;
3660     }
3661     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3662     rstart = rend - nlocal;
3663     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3664 
3665     /* next, compute all the lengths */
3666     jj    = aij->j;
3667     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3668     olens = dlens + m;
3669     for (i=0; i<m; i++) {
3670       jend = ii[i+1] - ii[i];
3671       olen = 0;
3672       dlen = 0;
3673       for (j=0; j<jend; j++) {
3674         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3675         else dlen++;
3676         jj++;
3677       }
3678       olens[i] = olen;
3679       dlens[i] = dlen;
3680     }
3681 
3682     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3683     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3684 
3685     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3686     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3687     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3688     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3689     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3690     ierr = PetscFree(dlens);CHKERRQ(ierr);
3691 
3692   } else { /* call == MAT_REUSE_MATRIX */
3693     M    = *newmat;
3694     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3695     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3696     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3697     /*
3698          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3699        rather than the slower MatSetValues().
3700     */
3701     M->was_assembled = PETSC_TRUE;
3702     M->assembled     = PETSC_FALSE;
3703   }
3704 
3705   /* (5) Set values of Msub to *newmat */
3706   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3707   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3708 
3709   jj   = aij->j;
3710   aa   = aij->a;
3711   for (i=0; i<m; i++) {
3712     row = rstart + i;
3713     nz  = ii[i+1] - ii[i];
3714     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3715     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3716     jj += nz; aa += nz;
3717   }
3718   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3719 
3720   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3721   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3722 
3723   ierr = PetscFree(colsub);CHKERRQ(ierr);
3724 
3725   /* save Msub, iscol_sub and iscmap used in processor for next request */
3726   if (call ==  MAT_INITIAL_MATRIX) {
3727     *newmat = M;
3728     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3729     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3730 
3731     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3732     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3733 
3734     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3735     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3736 
3737     if (iscol_local) {
3738       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3739       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3740     }
3741   }
3742   PetscFunctionReturn(0);
3743 }
3744 
3745 /*
3746     Not great since it makes two copies of the submatrix, first an SeqAIJ
3747   in local and then by concatenating the local matrices the end result.
3748   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3749 
3750   Note: This requires a sequential iscol with all indices.
3751 */
3752 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3753 {
3754   PetscErrorCode ierr;
3755   PetscMPIInt    rank,size;
3756   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3757   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3758   Mat            M,Mreuse;
3759   MatScalar      *aa,*vwork;
3760   MPI_Comm       comm;
3761   Mat_SeqAIJ     *aij;
3762   PetscBool      colflag,allcolumns=PETSC_FALSE;
3763 
3764   PetscFunctionBegin;
3765   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3766   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3767   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3768 
3769   /* Check for special case: each processor gets entire matrix columns */
3770   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3771   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3772   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3773 
3774   if (call ==  MAT_REUSE_MATRIX) {
3775     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3776     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3777     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3778   } else {
3779     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3780   }
3781 
3782   /*
3783       m - number of local rows
3784       n - number of columns (same on all processors)
3785       rstart - first row in new global matrix generated
3786   */
3787   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3788   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3789   if (call == MAT_INITIAL_MATRIX) {
3790     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3791     ii  = aij->i;
3792     jj  = aij->j;
3793 
3794     /*
3795         Determine the number of non-zeros in the diagonal and off-diagonal
3796         portions of the matrix in order to do correct preallocation
3797     */
3798 
3799     /* first get start and end of "diagonal" columns */
3800     if (csize == PETSC_DECIDE) {
3801       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3802       if (mglobal == n) { /* square matrix */
3803         nlocal = m;
3804       } else {
3805         nlocal = n/size + ((n % size) > rank);
3806       }
3807     } else {
3808       nlocal = csize;
3809     }
3810     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3811     rstart = rend - nlocal;
3812     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3813 
3814     /* next, compute all the lengths */
3815     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3816     olens = dlens + m;
3817     for (i=0; i<m; i++) {
3818       jend = ii[i+1] - ii[i];
3819       olen = 0;
3820       dlen = 0;
3821       for (j=0; j<jend; j++) {
3822         if (*jj < rstart || *jj >= rend) olen++;
3823         else dlen++;
3824         jj++;
3825       }
3826       olens[i] = olen;
3827       dlens[i] = dlen;
3828     }
3829     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3830     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3831     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3832     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3833     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3834     ierr = PetscFree(dlens);CHKERRQ(ierr);
3835   } else {
3836     PetscInt ml,nl;
3837 
3838     M    = *newmat;
3839     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3840     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3841     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3842     /*
3843          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3844        rather than the slower MatSetValues().
3845     */
3846     M->was_assembled = PETSC_TRUE;
3847     M->assembled     = PETSC_FALSE;
3848   }
3849   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3850   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3851   ii   = aij->i;
3852   jj   = aij->j;
3853   aa   = aij->a;
3854   for (i=0; i<m; i++) {
3855     row   = rstart + i;
3856     nz    = ii[i+1] - ii[i];
3857     cwork = jj;     jj += nz;
3858     vwork = aa;     aa += nz;
3859     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3860   }
3861 
3862   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3863   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3864   *newmat = M;
3865 
3866   /* save submatrix used in processor for next request */
3867   if (call ==  MAT_INITIAL_MATRIX) {
3868     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3869     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3870   }
3871   PetscFunctionReturn(0);
3872 }
3873 
3874 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3875 {
3876   PetscInt       m,cstart, cend,j,nnz,i,d;
3877   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3878   const PetscInt *JJ;
3879   PetscScalar    *values;
3880   PetscErrorCode ierr;
3881   PetscBool      nooffprocentries;
3882 
3883   PetscFunctionBegin;
3884   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3885 
3886   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3887   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3888   m      = B->rmap->n;
3889   cstart = B->cmap->rstart;
3890   cend   = B->cmap->rend;
3891   rstart = B->rmap->rstart;
3892 
3893   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3894 
3895 #if defined(PETSC_USE_DEBUG)
3896   for (i=0; i<m; i++) {
3897     nnz = Ii[i+1]- Ii[i];
3898     JJ  = J + Ii[i];
3899     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3900     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3901     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3902   }
3903 #endif
3904 
3905   for (i=0; i<m; i++) {
3906     nnz     = Ii[i+1]- Ii[i];
3907     JJ      = J + Ii[i];
3908     nnz_max = PetscMax(nnz_max,nnz);
3909     d       = 0;
3910     for (j=0; j<nnz; j++) {
3911       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3912     }
3913     d_nnz[i] = d;
3914     o_nnz[i] = nnz - d;
3915   }
3916   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3917   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3918 
3919   if (v) values = (PetscScalar*)v;
3920   else {
3921     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3922   }
3923 
3924   for (i=0; i<m; i++) {
3925     ii   = i + rstart;
3926     nnz  = Ii[i+1]- Ii[i];
3927     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3928   }
3929   nooffprocentries    = B->nooffprocentries;
3930   B->nooffprocentries = PETSC_TRUE;
3931   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3932   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3933   B->nooffprocentries = nooffprocentries;
3934 
3935   if (!v) {
3936     ierr = PetscFree(values);CHKERRQ(ierr);
3937   }
3938   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3939   PetscFunctionReturn(0);
3940 }
3941 
3942 /*@
3943    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3944    (the default parallel PETSc format).
3945 
3946    Collective on MPI_Comm
3947 
3948    Input Parameters:
3949 +  B - the matrix
3950 .  i - the indices into j for the start of each local row (starts with zero)
3951 .  j - the column indices for each local row (starts with zero)
3952 -  v - optional values in the matrix
3953 
3954    Level: developer
3955 
3956    Notes:
3957        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3958      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3959      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3960 
3961        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3962 
3963        The format which is used for the sparse matrix input, is equivalent to a
3964     row-major ordering.. i.e for the following matrix, the input data expected is
3965     as shown
3966 
3967 $        1 0 0
3968 $        2 0 3     P0
3969 $       -------
3970 $        4 5 6     P1
3971 $
3972 $     Process0 [P0]: rows_owned=[0,1]
3973 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3974 $        j =  {0,0,2}  [size = 3]
3975 $        v =  {1,2,3}  [size = 3]
3976 $
3977 $     Process1 [P1]: rows_owned=[2]
3978 $        i =  {0,3}    [size = nrow+1  = 1+1]
3979 $        j =  {0,1,2}  [size = 3]
3980 $        v =  {4,5,6}  [size = 3]
3981 
3982 .keywords: matrix, aij, compressed row, sparse, parallel
3983 
3984 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3985           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3986 @*/
3987 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3988 {
3989   PetscErrorCode ierr;
3990 
3991   PetscFunctionBegin;
3992   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3993   PetscFunctionReturn(0);
3994 }
3995 
3996 /*@C
3997    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3998    (the default parallel PETSc format).  For good matrix assembly performance
3999    the user should preallocate the matrix storage by setting the parameters
4000    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4001    performance can be increased by more than a factor of 50.
4002 
4003    Collective on MPI_Comm
4004 
4005    Input Parameters:
4006 +  B - the matrix
4007 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4008            (same value is used for all local rows)
4009 .  d_nnz - array containing the number of nonzeros in the various rows of the
4010            DIAGONAL portion of the local submatrix (possibly different for each row)
4011            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4012            The size of this array is equal to the number of local rows, i.e 'm'.
4013            For matrices that will be factored, you must leave room for (and set)
4014            the diagonal entry even if it is zero.
4015 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4016            submatrix (same value is used for all local rows).
4017 -  o_nnz - array containing the number of nonzeros in the various rows of the
4018            OFF-DIAGONAL portion of the local submatrix (possibly different for
4019            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4020            structure. The size of this array is equal to the number
4021            of local rows, i.e 'm'.
4022 
4023    If the *_nnz parameter is given then the *_nz parameter is ignored
4024 
4025    The AIJ format (also called the Yale sparse matrix format or
4026    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4027    storage.  The stored row and column indices begin with zero.
4028    See Users-Manual: ch_mat for details.
4029 
4030    The parallel matrix is partitioned such that the first m0 rows belong to
4031    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4032    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4033 
4034    The DIAGONAL portion of the local submatrix of a processor can be defined
4035    as the submatrix which is obtained by extraction the part corresponding to
4036    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4037    first row that belongs to the processor, r2 is the last row belonging to
4038    the this processor, and c1-c2 is range of indices of the local part of a
4039    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4040    common case of a square matrix, the row and column ranges are the same and
4041    the DIAGONAL part is also square. The remaining portion of the local
4042    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4043 
4044    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4045 
4046    You can call MatGetInfo() to get information on how effective the preallocation was;
4047    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4048    You can also run with the option -info and look for messages with the string
4049    malloc in them to see if additional memory allocation was needed.
4050 
4051    Example usage:
4052 
4053    Consider the following 8x8 matrix with 34 non-zero values, that is
4054    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4055    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4056    as follows:
4057 
4058 .vb
4059             1  2  0  |  0  3  0  |  0  4
4060     Proc0   0  5  6  |  7  0  0  |  8  0
4061             9  0 10  | 11  0  0  | 12  0
4062     -------------------------------------
4063            13  0 14  | 15 16 17  |  0  0
4064     Proc1   0 18  0  | 19 20 21  |  0  0
4065             0  0  0  | 22 23  0  | 24  0
4066     -------------------------------------
4067     Proc2  25 26 27  |  0  0 28  | 29  0
4068            30  0  0  | 31 32 33  |  0 34
4069 .ve
4070 
4071    This can be represented as a collection of submatrices as:
4072 
4073 .vb
4074       A B C
4075       D E F
4076       G H I
4077 .ve
4078 
4079    Where the submatrices A,B,C are owned by proc0, D,E,F are
4080    owned by proc1, G,H,I are owned by proc2.
4081 
4082    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4083    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4084    The 'M','N' parameters are 8,8, and have the same values on all procs.
4085 
4086    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4087    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4088    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4089    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4090    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4091    matrix, ans [DF] as another SeqAIJ matrix.
4092 
4093    When d_nz, o_nz parameters are specified, d_nz storage elements are
4094    allocated for every row of the local diagonal submatrix, and o_nz
4095    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4096    One way to choose d_nz and o_nz is to use the max nonzerors per local
4097    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4098    In this case, the values of d_nz,o_nz are:
4099 .vb
4100      proc0 : dnz = 2, o_nz = 2
4101      proc1 : dnz = 3, o_nz = 2
4102      proc2 : dnz = 1, o_nz = 4
4103 .ve
4104    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4105    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4106    for proc3. i.e we are using 12+15+10=37 storage locations to store
4107    34 values.
4108 
4109    When d_nnz, o_nnz parameters are specified, the storage is specified
4110    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4111    In the above case the values for d_nnz,o_nnz are:
4112 .vb
4113      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4114      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4115      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4116 .ve
4117    Here the space allocated is sum of all the above values i.e 34, and
4118    hence pre-allocation is perfect.
4119 
4120    Level: intermediate
4121 
4122 .keywords: matrix, aij, compressed row, sparse, parallel
4123 
4124 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4125           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4126 @*/
4127 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4128 {
4129   PetscErrorCode ierr;
4130 
4131   PetscFunctionBegin;
4132   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4133   PetscValidType(B,1);
4134   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4135   PetscFunctionReturn(0);
4136 }
4137 
4138 /*@
4139      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4140          CSR format the local rows.
4141 
4142    Collective on MPI_Comm
4143 
4144    Input Parameters:
4145 +  comm - MPI communicator
4146 .  m - number of local rows (Cannot be PETSC_DECIDE)
4147 .  n - This value should be the same as the local size used in creating the
4148        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4149        calculated if N is given) For square matrices n is almost always m.
4150 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4151 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4152 .   i - row indices
4153 .   j - column indices
4154 -   a - matrix values
4155 
4156    Output Parameter:
4157 .   mat - the matrix
4158 
4159    Level: intermediate
4160 
4161    Notes:
4162        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4163      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4164      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4165 
4166        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4167 
4168        The format which is used for the sparse matrix input, is equivalent to a
4169     row-major ordering.. i.e for the following matrix, the input data expected is
4170     as shown
4171 
4172 $        1 0 0
4173 $        2 0 3     P0
4174 $       -------
4175 $        4 5 6     P1
4176 $
4177 $     Process0 [P0]: rows_owned=[0,1]
4178 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4179 $        j =  {0,0,2}  [size = 3]
4180 $        v =  {1,2,3}  [size = 3]
4181 $
4182 $     Process1 [P1]: rows_owned=[2]
4183 $        i =  {0,3}    [size = nrow+1  = 1+1]
4184 $        j =  {0,1,2}  [size = 3]
4185 $        v =  {4,5,6}  [size = 3]
4186 
4187 .keywords: matrix, aij, compressed row, sparse, parallel
4188 
4189 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4190           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4191 @*/
4192 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4193 {
4194   PetscErrorCode ierr;
4195 
4196   PetscFunctionBegin;
4197   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4198   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4199   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4200   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4201   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4202   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4203   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4204   PetscFunctionReturn(0);
4205 }
4206 
4207 /*@C
4208    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4209    (the default parallel PETSc format).  For good matrix assembly performance
4210    the user should preallocate the matrix storage by setting the parameters
4211    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4212    performance can be increased by more than a factor of 50.
4213 
4214    Collective on MPI_Comm
4215 
4216    Input Parameters:
4217 +  comm - MPI communicator
4218 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4219            This value should be the same as the local size used in creating the
4220            y vector for the matrix-vector product y = Ax.
4221 .  n - This value should be the same as the local size used in creating the
4222        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4223        calculated if N is given) For square matrices n is almost always m.
4224 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4225 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4226 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4227            (same value is used for all local rows)
4228 .  d_nnz - array containing the number of nonzeros in the various rows of the
4229            DIAGONAL portion of the local submatrix (possibly different for each row)
4230            or NULL, if d_nz is used to specify the nonzero structure.
4231            The size of this array is equal to the number of local rows, i.e 'm'.
4232 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4233            submatrix (same value is used for all local rows).
4234 -  o_nnz - array containing the number of nonzeros in the various rows of the
4235            OFF-DIAGONAL portion of the local submatrix (possibly different for
4236            each row) or NULL, if o_nz is used to specify the nonzero
4237            structure. The size of this array is equal to the number
4238            of local rows, i.e 'm'.
4239 
4240    Output Parameter:
4241 .  A - the matrix
4242 
4243    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4244    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4245    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4246 
4247    Notes:
4248    If the *_nnz parameter is given then the *_nz parameter is ignored
4249 
4250    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4251    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4252    storage requirements for this matrix.
4253 
4254    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4255    processor than it must be used on all processors that share the object for
4256    that argument.
4257 
4258    The user MUST specify either the local or global matrix dimensions
4259    (possibly both).
4260 
4261    The parallel matrix is partitioned across processors such that the
4262    first m0 rows belong to process 0, the next m1 rows belong to
4263    process 1, the next m2 rows belong to process 2 etc.. where
4264    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4265    values corresponding to [m x N] submatrix.
4266 
4267    The columns are logically partitioned with the n0 columns belonging
4268    to 0th partition, the next n1 columns belonging to the next
4269    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4270 
4271    The DIAGONAL portion of the local submatrix on any given processor
4272    is the submatrix corresponding to the rows and columns m,n
4273    corresponding to the given processor. i.e diagonal matrix on
4274    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4275    etc. The remaining portion of the local submatrix [m x (N-n)]
4276    constitute the OFF-DIAGONAL portion. The example below better
4277    illustrates this concept.
4278 
4279    For a square global matrix we define each processor's diagonal portion
4280    to be its local rows and the corresponding columns (a square submatrix);
4281    each processor's off-diagonal portion encompasses the remainder of the
4282    local matrix (a rectangular submatrix).
4283 
4284    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4285 
4286    When calling this routine with a single process communicator, a matrix of
4287    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4288    type of communicator, use the construction mechanism
4289 .vb
4290      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4291 .ve
4292 
4293 $     MatCreate(...,&A);
4294 $     MatSetType(A,MATMPIAIJ);
4295 $     MatSetSizes(A, m,n,M,N);
4296 $     MatMPIAIJSetPreallocation(A,...);
4297 
4298    By default, this format uses inodes (identical nodes) when possible.
4299    We search for consecutive rows with the same nonzero structure, thereby
4300    reusing matrix information to achieve increased efficiency.
4301 
4302    Options Database Keys:
4303 +  -mat_no_inode  - Do not use inodes
4304 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4305 
4306 
4307 
4308    Example usage:
4309 
4310    Consider the following 8x8 matrix with 34 non-zero values, that is
4311    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4312    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4313    as follows
4314 
4315 .vb
4316             1  2  0  |  0  3  0  |  0  4
4317     Proc0   0  5  6  |  7  0  0  |  8  0
4318             9  0 10  | 11  0  0  | 12  0
4319     -------------------------------------
4320            13  0 14  | 15 16 17  |  0  0
4321     Proc1   0 18  0  | 19 20 21  |  0  0
4322             0  0  0  | 22 23  0  | 24  0
4323     -------------------------------------
4324     Proc2  25 26 27  |  0  0 28  | 29  0
4325            30  0  0  | 31 32 33  |  0 34
4326 .ve
4327 
4328    This can be represented as a collection of submatrices as
4329 
4330 .vb
4331       A B C
4332       D E F
4333       G H I
4334 .ve
4335 
4336    Where the submatrices A,B,C are owned by proc0, D,E,F are
4337    owned by proc1, G,H,I are owned by proc2.
4338 
4339    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4340    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4341    The 'M','N' parameters are 8,8, and have the same values on all procs.
4342 
4343    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4344    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4345    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4346    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4347    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4348    matrix, ans [DF] as another SeqAIJ matrix.
4349 
4350    When d_nz, o_nz parameters are specified, d_nz storage elements are
4351    allocated for every row of the local diagonal submatrix, and o_nz
4352    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4353    One way to choose d_nz and o_nz is to use the max nonzerors per local
4354    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4355    In this case, the values of d_nz,o_nz are
4356 .vb
4357      proc0 : dnz = 2, o_nz = 2
4358      proc1 : dnz = 3, o_nz = 2
4359      proc2 : dnz = 1, o_nz = 4
4360 .ve
4361    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4362    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4363    for proc3. i.e we are using 12+15+10=37 storage locations to store
4364    34 values.
4365 
4366    When d_nnz, o_nnz parameters are specified, the storage is specified
4367    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4368    In the above case the values for d_nnz,o_nnz are
4369 .vb
4370      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4371      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4372      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4373 .ve
4374    Here the space allocated is sum of all the above values i.e 34, and
4375    hence pre-allocation is perfect.
4376 
4377    Level: intermediate
4378 
4379 .keywords: matrix, aij, compressed row, sparse, parallel
4380 
4381 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4382           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4383 @*/
4384 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4385 {
4386   PetscErrorCode ierr;
4387   PetscMPIInt    size;
4388 
4389   PetscFunctionBegin;
4390   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4391   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4392   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4393   if (size > 1) {
4394     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4395     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4396   } else {
4397     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4398     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4399   }
4400   PetscFunctionReturn(0);
4401 }
4402 
4403 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4404 {
4405   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4406   PetscBool      flg;
4407   PetscErrorCode ierr;
4408 
4409   PetscFunctionBegin;
4410   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4411   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4412   if (Ad)     *Ad     = a->A;
4413   if (Ao)     *Ao     = a->B;
4414   if (colmap) *colmap = a->garray;
4415   PetscFunctionReturn(0);
4416 }
4417 
4418 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4419 {
4420   PetscErrorCode ierr;
4421   PetscInt       m,N,i,rstart,nnz,Ii;
4422   PetscInt       *indx;
4423   PetscScalar    *values;
4424 
4425   PetscFunctionBegin;
4426   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4427   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4428     PetscInt       *dnz,*onz,sum,bs,cbs;
4429 
4430     if (n == PETSC_DECIDE) {
4431       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4432     }
4433     /* Check sum(n) = N */
4434     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4435     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4436 
4437     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4438     rstart -= m;
4439 
4440     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4441     for (i=0; i<m; i++) {
4442       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4443       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4444       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4445     }
4446 
4447     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4448     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4449     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4450     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4451     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4452     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4453     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4454     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4455   }
4456 
4457   /* numeric phase */
4458   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4459   for (i=0; i<m; i++) {
4460     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4461     Ii   = i + rstart;
4462     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4463     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4464   }
4465   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4466   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4467   PetscFunctionReturn(0);
4468 }
4469 
4470 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4471 {
4472   PetscErrorCode    ierr;
4473   PetscMPIInt       rank;
4474   PetscInt          m,N,i,rstart,nnz;
4475   size_t            len;
4476   const PetscInt    *indx;
4477   PetscViewer       out;
4478   char              *name;
4479   Mat               B;
4480   const PetscScalar *values;
4481 
4482   PetscFunctionBegin;
4483   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4484   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4485   /* Should this be the type of the diagonal block of A? */
4486   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4487   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4488   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4489   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4490   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4491   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4492   for (i=0; i<m; i++) {
4493     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4494     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4495     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4496   }
4497   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4498   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4499 
4500   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4501   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4502   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4503   sprintf(name,"%s.%d",outfile,rank);
4504   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4505   ierr = PetscFree(name);CHKERRQ(ierr);
4506   ierr = MatView(B,out);CHKERRQ(ierr);
4507   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4508   ierr = MatDestroy(&B);CHKERRQ(ierr);
4509   PetscFunctionReturn(0);
4510 }
4511 
4512 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4513 {
4514   PetscErrorCode      ierr;
4515   Mat_Merge_SeqsToMPI *merge;
4516   PetscContainer      container;
4517 
4518   PetscFunctionBegin;
4519   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4520   if (container) {
4521     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4522     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4523     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4524     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4525     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4526     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4527     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4528     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4529     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4530     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4531     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4532     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4533     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4534     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4535     ierr = PetscFree(merge);CHKERRQ(ierr);
4536     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4537   }
4538   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4539   PetscFunctionReturn(0);
4540 }
4541 
4542 #include <../src/mat/utils/freespace.h>
4543 #include <petscbt.h>
4544 
4545 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4546 {
4547   PetscErrorCode      ierr;
4548   MPI_Comm            comm;
4549   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4550   PetscMPIInt         size,rank,taga,*len_s;
4551   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4552   PetscInt            proc,m;
4553   PetscInt            **buf_ri,**buf_rj;
4554   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4555   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4556   MPI_Request         *s_waits,*r_waits;
4557   MPI_Status          *status;
4558   MatScalar           *aa=a->a;
4559   MatScalar           **abuf_r,*ba_i;
4560   Mat_Merge_SeqsToMPI *merge;
4561   PetscContainer      container;
4562 
4563   PetscFunctionBegin;
4564   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4565   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4566 
4567   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4568   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4569 
4570   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4571   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4572 
4573   bi     = merge->bi;
4574   bj     = merge->bj;
4575   buf_ri = merge->buf_ri;
4576   buf_rj = merge->buf_rj;
4577 
4578   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4579   owners = merge->rowmap->range;
4580   len_s  = merge->len_s;
4581 
4582   /* send and recv matrix values */
4583   /*-----------------------------*/
4584   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4585   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4586 
4587   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4588   for (proc=0,k=0; proc<size; proc++) {
4589     if (!len_s[proc]) continue;
4590     i    = owners[proc];
4591     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4592     k++;
4593   }
4594 
4595   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4596   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4597   ierr = PetscFree(status);CHKERRQ(ierr);
4598 
4599   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4600   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4601 
4602   /* insert mat values of mpimat */
4603   /*----------------------------*/
4604   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4605   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4606 
4607   for (k=0; k<merge->nrecv; k++) {
4608     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4609     nrows       = *(buf_ri_k[k]);
4610     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4611     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4612   }
4613 
4614   /* set values of ba */
4615   m = merge->rowmap->n;
4616   for (i=0; i<m; i++) {
4617     arow = owners[rank] + i;
4618     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4619     bnzi = bi[i+1] - bi[i];
4620     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4621 
4622     /* add local non-zero vals of this proc's seqmat into ba */
4623     anzi   = ai[arow+1] - ai[arow];
4624     aj     = a->j + ai[arow];
4625     aa     = a->a + ai[arow];
4626     nextaj = 0;
4627     for (j=0; nextaj<anzi; j++) {
4628       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4629         ba_i[j] += aa[nextaj++];
4630       }
4631     }
4632 
4633     /* add received vals into ba */
4634     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4635       /* i-th row */
4636       if (i == *nextrow[k]) {
4637         anzi   = *(nextai[k]+1) - *nextai[k];
4638         aj     = buf_rj[k] + *(nextai[k]);
4639         aa     = abuf_r[k] + *(nextai[k]);
4640         nextaj = 0;
4641         for (j=0; nextaj<anzi; j++) {
4642           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4643             ba_i[j] += aa[nextaj++];
4644           }
4645         }
4646         nextrow[k]++; nextai[k]++;
4647       }
4648     }
4649     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4650   }
4651   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4652   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4653 
4654   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4655   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4656   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4657   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4658   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4659   PetscFunctionReturn(0);
4660 }
4661 
4662 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4663 {
4664   PetscErrorCode      ierr;
4665   Mat                 B_mpi;
4666   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4667   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4668   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4669   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4670   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4671   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4672   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4673   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4674   MPI_Status          *status;
4675   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4676   PetscBT             lnkbt;
4677   Mat_Merge_SeqsToMPI *merge;
4678   PetscContainer      container;
4679 
4680   PetscFunctionBegin;
4681   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4682 
4683   /* make sure it is a PETSc comm */
4684   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4685   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4686   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4687 
4688   ierr = PetscNew(&merge);CHKERRQ(ierr);
4689   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4690 
4691   /* determine row ownership */
4692   /*---------------------------------------------------------*/
4693   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4694   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4695   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4696   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4697   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4698   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4699   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4700 
4701   m      = merge->rowmap->n;
4702   owners = merge->rowmap->range;
4703 
4704   /* determine the number of messages to send, their lengths */
4705   /*---------------------------------------------------------*/
4706   len_s = merge->len_s;
4707 
4708   len          = 0; /* length of buf_si[] */
4709   merge->nsend = 0;
4710   for (proc=0; proc<size; proc++) {
4711     len_si[proc] = 0;
4712     if (proc == rank) {
4713       len_s[proc] = 0;
4714     } else {
4715       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4716       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4717     }
4718     if (len_s[proc]) {
4719       merge->nsend++;
4720       nrows = 0;
4721       for (i=owners[proc]; i<owners[proc+1]; i++) {
4722         if (ai[i+1] > ai[i]) nrows++;
4723       }
4724       len_si[proc] = 2*(nrows+1);
4725       len         += len_si[proc];
4726     }
4727   }
4728 
4729   /* determine the number and length of messages to receive for ij-structure */
4730   /*-------------------------------------------------------------------------*/
4731   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4732   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4733 
4734   /* post the Irecv of j-structure */
4735   /*-------------------------------*/
4736   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4737   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4738 
4739   /* post the Isend of j-structure */
4740   /*--------------------------------*/
4741   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4742 
4743   for (proc=0, k=0; proc<size; proc++) {
4744     if (!len_s[proc]) continue;
4745     i    = owners[proc];
4746     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4747     k++;
4748   }
4749 
4750   /* receives and sends of j-structure are complete */
4751   /*------------------------------------------------*/
4752   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4753   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4754 
4755   /* send and recv i-structure */
4756   /*---------------------------*/
4757   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4758   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4759 
4760   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4761   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4762   for (proc=0,k=0; proc<size; proc++) {
4763     if (!len_s[proc]) continue;
4764     /* form outgoing message for i-structure:
4765          buf_si[0]:                 nrows to be sent
4766                [1:nrows]:           row index (global)
4767                [nrows+1:2*nrows+1]: i-structure index
4768     */
4769     /*-------------------------------------------*/
4770     nrows       = len_si[proc]/2 - 1;
4771     buf_si_i    = buf_si + nrows+1;
4772     buf_si[0]   = nrows;
4773     buf_si_i[0] = 0;
4774     nrows       = 0;
4775     for (i=owners[proc]; i<owners[proc+1]; i++) {
4776       anzi = ai[i+1] - ai[i];
4777       if (anzi) {
4778         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4779         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4780         nrows++;
4781       }
4782     }
4783     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4784     k++;
4785     buf_si += len_si[proc];
4786   }
4787 
4788   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4789   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4790 
4791   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4792   for (i=0; i<merge->nrecv; i++) {
4793     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4794   }
4795 
4796   ierr = PetscFree(len_si);CHKERRQ(ierr);
4797   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4798   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4799   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4800   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4801   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4802   ierr = PetscFree(status);CHKERRQ(ierr);
4803 
4804   /* compute a local seq matrix in each processor */
4805   /*----------------------------------------------*/
4806   /* allocate bi array and free space for accumulating nonzero column info */
4807   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4808   bi[0] = 0;
4809 
4810   /* create and initialize a linked list */
4811   nlnk = N+1;
4812   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4813 
4814   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4815   len  = ai[owners[rank+1]] - ai[owners[rank]];
4816   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4817 
4818   current_space = free_space;
4819 
4820   /* determine symbolic info for each local row */
4821   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4822 
4823   for (k=0; k<merge->nrecv; k++) {
4824     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4825     nrows       = *buf_ri_k[k];
4826     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4827     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4828   }
4829 
4830   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4831   len  = 0;
4832   for (i=0; i<m; i++) {
4833     bnzi = 0;
4834     /* add local non-zero cols of this proc's seqmat into lnk */
4835     arow  = owners[rank] + i;
4836     anzi  = ai[arow+1] - ai[arow];
4837     aj    = a->j + ai[arow];
4838     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4839     bnzi += nlnk;
4840     /* add received col data into lnk */
4841     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4842       if (i == *nextrow[k]) { /* i-th row */
4843         anzi  = *(nextai[k]+1) - *nextai[k];
4844         aj    = buf_rj[k] + *nextai[k];
4845         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4846         bnzi += nlnk;
4847         nextrow[k]++; nextai[k]++;
4848       }
4849     }
4850     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4851 
4852     /* if free space is not available, make more free space */
4853     if (current_space->local_remaining<bnzi) {
4854       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4855       nspacedouble++;
4856     }
4857     /* copy data into free space, then initialize lnk */
4858     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4859     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4860 
4861     current_space->array           += bnzi;
4862     current_space->local_used      += bnzi;
4863     current_space->local_remaining -= bnzi;
4864 
4865     bi[i+1] = bi[i] + bnzi;
4866   }
4867 
4868   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4869 
4870   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4871   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4872   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4873 
4874   /* create symbolic parallel matrix B_mpi */
4875   /*---------------------------------------*/
4876   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4877   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4878   if (n==PETSC_DECIDE) {
4879     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4880   } else {
4881     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4882   }
4883   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4884   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4885   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4886   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4887   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4888 
4889   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4890   B_mpi->assembled    = PETSC_FALSE;
4891   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4892   merge->bi           = bi;
4893   merge->bj           = bj;
4894   merge->buf_ri       = buf_ri;
4895   merge->buf_rj       = buf_rj;
4896   merge->coi          = NULL;
4897   merge->coj          = NULL;
4898   merge->owners_co    = NULL;
4899 
4900   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4901 
4902   /* attach the supporting struct to B_mpi for reuse */
4903   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4904   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4905   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4906   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4907   *mpimat = B_mpi;
4908 
4909   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4910   PetscFunctionReturn(0);
4911 }
4912 
4913 /*@C
4914       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4915                  matrices from each processor
4916 
4917     Collective on MPI_Comm
4918 
4919    Input Parameters:
4920 +    comm - the communicators the parallel matrix will live on
4921 .    seqmat - the input sequential matrices
4922 .    m - number of local rows (or PETSC_DECIDE)
4923 .    n - number of local columns (or PETSC_DECIDE)
4924 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4925 
4926    Output Parameter:
4927 .    mpimat - the parallel matrix generated
4928 
4929     Level: advanced
4930 
4931    Notes:
4932      The dimensions of the sequential matrix in each processor MUST be the same.
4933      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4934      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4935 @*/
4936 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4937 {
4938   PetscErrorCode ierr;
4939   PetscMPIInt    size;
4940 
4941   PetscFunctionBegin;
4942   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4943   if (size == 1) {
4944     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4945     if (scall == MAT_INITIAL_MATRIX) {
4946       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4947     } else {
4948       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4949     }
4950     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4951     PetscFunctionReturn(0);
4952   }
4953   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4954   if (scall == MAT_INITIAL_MATRIX) {
4955     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4956   }
4957   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4958   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4959   PetscFunctionReturn(0);
4960 }
4961 
4962 /*@
4963      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4964           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4965           with MatGetSize()
4966 
4967     Not Collective
4968 
4969    Input Parameters:
4970 +    A - the matrix
4971 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4972 
4973    Output Parameter:
4974 .    A_loc - the local sequential matrix generated
4975 
4976     Level: developer
4977 
4978 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4979 
4980 @*/
4981 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4982 {
4983   PetscErrorCode ierr;
4984   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4985   Mat_SeqAIJ     *mat,*a,*b;
4986   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4987   MatScalar      *aa,*ba,*cam;
4988   PetscScalar    *ca;
4989   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4990   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4991   PetscBool      match;
4992   MPI_Comm       comm;
4993   PetscMPIInt    size;
4994 
4995   PetscFunctionBegin;
4996   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4997   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4998   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4999   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5000   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5001 
5002   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5003   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5004   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5005   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5006   aa = a->a; ba = b->a;
5007   if (scall == MAT_INITIAL_MATRIX) {
5008     if (size == 1) {
5009       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5010       PetscFunctionReturn(0);
5011     }
5012 
5013     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5014     ci[0] = 0;
5015     for (i=0; i<am; i++) {
5016       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5017     }
5018     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5019     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5020     k    = 0;
5021     for (i=0; i<am; i++) {
5022       ncols_o = bi[i+1] - bi[i];
5023       ncols_d = ai[i+1] - ai[i];
5024       /* off-diagonal portion of A */
5025       for (jo=0; jo<ncols_o; jo++) {
5026         col = cmap[*bj];
5027         if (col >= cstart) break;
5028         cj[k]   = col; bj++;
5029         ca[k++] = *ba++;
5030       }
5031       /* diagonal portion of A */
5032       for (j=0; j<ncols_d; j++) {
5033         cj[k]   = cstart + *aj++;
5034         ca[k++] = *aa++;
5035       }
5036       /* off-diagonal portion of A */
5037       for (j=jo; j<ncols_o; j++) {
5038         cj[k]   = cmap[*bj++];
5039         ca[k++] = *ba++;
5040       }
5041     }
5042     /* put together the new matrix */
5043     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5044     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5045     /* Since these are PETSc arrays, change flags to free them as necessary. */
5046     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5047     mat->free_a  = PETSC_TRUE;
5048     mat->free_ij = PETSC_TRUE;
5049     mat->nonew   = 0;
5050   } else if (scall == MAT_REUSE_MATRIX) {
5051     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5052     ci = mat->i; cj = mat->j; cam = mat->a;
5053     for (i=0; i<am; i++) {
5054       /* off-diagonal portion of A */
5055       ncols_o = bi[i+1] - bi[i];
5056       for (jo=0; jo<ncols_o; jo++) {
5057         col = cmap[*bj];
5058         if (col >= cstart) break;
5059         *cam++ = *ba++; bj++;
5060       }
5061       /* diagonal portion of A */
5062       ncols_d = ai[i+1] - ai[i];
5063       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5064       /* off-diagonal portion of A */
5065       for (j=jo; j<ncols_o; j++) {
5066         *cam++ = *ba++; bj++;
5067       }
5068     }
5069   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5070   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5071   PetscFunctionReturn(0);
5072 }
5073 
5074 /*@C
5075      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5076 
5077     Not Collective
5078 
5079    Input Parameters:
5080 +    A - the matrix
5081 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5082 -    row, col - index sets of rows and columns to extract (or NULL)
5083 
5084    Output Parameter:
5085 .    A_loc - the local sequential matrix generated
5086 
5087     Level: developer
5088 
5089 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5090 
5091 @*/
5092 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5093 {
5094   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5095   PetscErrorCode ierr;
5096   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5097   IS             isrowa,iscola;
5098   Mat            *aloc;
5099   PetscBool      match;
5100 
5101   PetscFunctionBegin;
5102   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5103   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5104   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5105   if (!row) {
5106     start = A->rmap->rstart; end = A->rmap->rend;
5107     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5108   } else {
5109     isrowa = *row;
5110   }
5111   if (!col) {
5112     start = A->cmap->rstart;
5113     cmap  = a->garray;
5114     nzA   = a->A->cmap->n;
5115     nzB   = a->B->cmap->n;
5116     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5117     ncols = 0;
5118     for (i=0; i<nzB; i++) {
5119       if (cmap[i] < start) idx[ncols++] = cmap[i];
5120       else break;
5121     }
5122     imark = i;
5123     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5124     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5125     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5126   } else {
5127     iscola = *col;
5128   }
5129   if (scall != MAT_INITIAL_MATRIX) {
5130     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5131     aloc[0] = *A_loc;
5132   }
5133   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5134   if (!col) { /* attach global id of condensed columns */
5135     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5136   }
5137   *A_loc = aloc[0];
5138   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5139   if (!row) {
5140     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5141   }
5142   if (!col) {
5143     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5144   }
5145   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5146   PetscFunctionReturn(0);
5147 }
5148 
5149 /*@C
5150     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5151 
5152     Collective on Mat
5153 
5154    Input Parameters:
5155 +    A,B - the matrices in mpiaij format
5156 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5157 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5158 
5159    Output Parameter:
5160 +    rowb, colb - index sets of rows and columns of B to extract
5161 -    B_seq - the sequential matrix generated
5162 
5163     Level: developer
5164 
5165 @*/
5166 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5167 {
5168   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5169   PetscErrorCode ierr;
5170   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5171   IS             isrowb,iscolb;
5172   Mat            *bseq=NULL;
5173 
5174   PetscFunctionBegin;
5175   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5176     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5177   }
5178   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5179 
5180   if (scall == MAT_INITIAL_MATRIX) {
5181     start = A->cmap->rstart;
5182     cmap  = a->garray;
5183     nzA   = a->A->cmap->n;
5184     nzB   = a->B->cmap->n;
5185     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5186     ncols = 0;
5187     for (i=0; i<nzB; i++) {  /* row < local row index */
5188       if (cmap[i] < start) idx[ncols++] = cmap[i];
5189       else break;
5190     }
5191     imark = i;
5192     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5193     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5194     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5195     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5196   } else {
5197     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5198     isrowb  = *rowb; iscolb = *colb;
5199     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5200     bseq[0] = *B_seq;
5201   }
5202   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5203   *B_seq = bseq[0];
5204   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5205   if (!rowb) {
5206     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5207   } else {
5208     *rowb = isrowb;
5209   }
5210   if (!colb) {
5211     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5212   } else {
5213     *colb = iscolb;
5214   }
5215   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5216   PetscFunctionReturn(0);
5217 }
5218 
5219 /*
5220     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5221     of the OFF-DIAGONAL portion of local A
5222 
5223     Collective on Mat
5224 
5225    Input Parameters:
5226 +    A,B - the matrices in mpiaij format
5227 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5228 
5229    Output Parameter:
5230 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5231 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5232 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5233 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5234 
5235     Level: developer
5236 
5237 */
5238 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5239 {
5240   VecScatter_MPI_General *gen_to,*gen_from;
5241   PetscErrorCode         ierr;
5242   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5243   Mat_SeqAIJ             *b_oth;
5244   VecScatter             ctx;
5245   MPI_Comm               comm;
5246   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5247   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5248   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5249   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5250   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5251   MPI_Request            *rwaits = NULL,*swaits = NULL;
5252   MPI_Status             *sstatus,rstatus;
5253   PetscMPIInt            jj,size;
5254   VecScatterType         type;
5255   PetscBool              mpi1;
5256 
5257   PetscFunctionBegin;
5258   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5259   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5260 
5261   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5262     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5263   }
5264   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5265   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5266 
5267   if (size == 1) {
5268     startsj_s = NULL;
5269     bufa_ptr  = NULL;
5270     *B_oth    = NULL;
5271     PetscFunctionReturn(0);
5272   }
5273 
5274   ctx = a->Mvctx;
5275   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5276   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5277   if (!mpi1) {
5278     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5279      thus create a->Mvctx_mpi1 */
5280     if (!a->Mvctx_mpi1) {
5281       a->Mvctx_mpi1_flg = PETSC_TRUE;
5282       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5283     }
5284     ctx = a->Mvctx_mpi1;
5285   }
5286   tag = ((PetscObject)ctx)->tag;
5287 
5288   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5289   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5290   nrecvs   = gen_from->n;
5291   nsends   = gen_to->n;
5292 
5293   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5294   srow    = gen_to->indices;    /* local row index to be sent */
5295   sstarts = gen_to->starts;
5296   sprocs  = gen_to->procs;
5297   sstatus = gen_to->sstatus;
5298   sbs     = gen_to->bs;
5299   rstarts = gen_from->starts;
5300   rprocs  = gen_from->procs;
5301   rbs     = gen_from->bs;
5302 
5303   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5304   if (scall == MAT_INITIAL_MATRIX) {
5305     /* i-array */
5306     /*---------*/
5307     /*  post receives */
5308     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5309     for (i=0; i<nrecvs; i++) {
5310       rowlen = rvalues + rstarts[i]*rbs;
5311       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5312       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5313     }
5314 
5315     /* pack the outgoing message */
5316     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5317 
5318     sstartsj[0] = 0;
5319     rstartsj[0] = 0;
5320     len         = 0; /* total length of j or a array to be sent */
5321     k           = 0;
5322     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5323     for (i=0; i<nsends; i++) {
5324       rowlen = svalues + sstarts[i]*sbs;
5325       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5326       for (j=0; j<nrows; j++) {
5327         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5328         for (l=0; l<sbs; l++) {
5329           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5330 
5331           rowlen[j*sbs+l] = ncols;
5332 
5333           len += ncols;
5334           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5335         }
5336         k++;
5337       }
5338       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5339 
5340       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5341     }
5342     /* recvs and sends of i-array are completed */
5343     i = nrecvs;
5344     while (i--) {
5345       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5346     }
5347     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5348     ierr = PetscFree(svalues);CHKERRQ(ierr);
5349 
5350     /* allocate buffers for sending j and a arrays */
5351     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5352     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5353 
5354     /* create i-array of B_oth */
5355     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5356 
5357     b_othi[0] = 0;
5358     len       = 0; /* total length of j or a array to be received */
5359     k         = 0;
5360     for (i=0; i<nrecvs; i++) {
5361       rowlen = rvalues + rstarts[i]*rbs;
5362       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5363       for (j=0; j<nrows; j++) {
5364         b_othi[k+1] = b_othi[k] + rowlen[j];
5365         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5366         k++;
5367       }
5368       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5369     }
5370     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5371 
5372     /* allocate space for j and a arrrays of B_oth */
5373     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5374     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5375 
5376     /* j-array */
5377     /*---------*/
5378     /*  post receives of j-array */
5379     for (i=0; i<nrecvs; i++) {
5380       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5381       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5382     }
5383 
5384     /* pack the outgoing message j-array */
5385     k = 0;
5386     for (i=0; i<nsends; i++) {
5387       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5388       bufJ  = bufj+sstartsj[i];
5389       for (j=0; j<nrows; j++) {
5390         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5391         for (ll=0; ll<sbs; ll++) {
5392           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5393           for (l=0; l<ncols; l++) {
5394             *bufJ++ = cols[l];
5395           }
5396           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5397         }
5398       }
5399       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5400     }
5401 
5402     /* recvs and sends of j-array are completed */
5403     i = nrecvs;
5404     while (i--) {
5405       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5406     }
5407     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5408   } else if (scall == MAT_REUSE_MATRIX) {
5409     sstartsj = *startsj_s;
5410     rstartsj = *startsj_r;
5411     bufa     = *bufa_ptr;
5412     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5413     b_otha   = b_oth->a;
5414   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5415 
5416   /* a-array */
5417   /*---------*/
5418   /*  post receives of a-array */
5419   for (i=0; i<nrecvs; i++) {
5420     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5421     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5422   }
5423 
5424   /* pack the outgoing message a-array */
5425   k = 0;
5426   for (i=0; i<nsends; i++) {
5427     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5428     bufA  = bufa+sstartsj[i];
5429     for (j=0; j<nrows; j++) {
5430       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5431       for (ll=0; ll<sbs; ll++) {
5432         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5433         for (l=0; l<ncols; l++) {
5434           *bufA++ = vals[l];
5435         }
5436         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5437       }
5438     }
5439     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5440   }
5441   /* recvs and sends of a-array are completed */
5442   i = nrecvs;
5443   while (i--) {
5444     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5445   }
5446   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5447   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5448 
5449   if (scall == MAT_INITIAL_MATRIX) {
5450     /* put together the new matrix */
5451     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5452 
5453     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5454     /* Since these are PETSc arrays, change flags to free them as necessary. */
5455     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5456     b_oth->free_a  = PETSC_TRUE;
5457     b_oth->free_ij = PETSC_TRUE;
5458     b_oth->nonew   = 0;
5459 
5460     ierr = PetscFree(bufj);CHKERRQ(ierr);
5461     if (!startsj_s || !bufa_ptr) {
5462       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5463       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5464     } else {
5465       *startsj_s = sstartsj;
5466       *startsj_r = rstartsj;
5467       *bufa_ptr  = bufa;
5468     }
5469   }
5470   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5471   PetscFunctionReturn(0);
5472 }
5473 
5474 /*@C
5475   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5476 
5477   Not Collective
5478 
5479   Input Parameters:
5480 . A - The matrix in mpiaij format
5481 
5482   Output Parameter:
5483 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5484 . colmap - A map from global column index to local index into lvec
5485 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5486 
5487   Level: developer
5488 
5489 @*/
5490 #if defined(PETSC_USE_CTABLE)
5491 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5492 #else
5493 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5494 #endif
5495 {
5496   Mat_MPIAIJ *a;
5497 
5498   PetscFunctionBegin;
5499   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5500   PetscValidPointer(lvec, 2);
5501   PetscValidPointer(colmap, 3);
5502   PetscValidPointer(multScatter, 4);
5503   a = (Mat_MPIAIJ*) A->data;
5504   if (lvec) *lvec = a->lvec;
5505   if (colmap) *colmap = a->colmap;
5506   if (multScatter) *multScatter = a->Mvctx;
5507   PetscFunctionReturn(0);
5508 }
5509 
5510 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5511 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5512 #if defined(PETSC_HAVE_MKL_SPARSE)
5513 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5514 #endif
5515 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5516 #if defined(PETSC_HAVE_ELEMENTAL)
5517 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5518 #endif
5519 #if defined(PETSC_HAVE_HYPRE)
5520 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5521 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5522 #endif
5523 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5524 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5525 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5526 
5527 /*
5528     Computes (B'*A')' since computing B*A directly is untenable
5529 
5530                n                       p                          p
5531         (              )       (              )         (                  )
5532       m (      A       )  *  n (       B      )   =   m (         C        )
5533         (              )       (              )         (                  )
5534 
5535 */
5536 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5537 {
5538   PetscErrorCode ierr;
5539   Mat            At,Bt,Ct;
5540 
5541   PetscFunctionBegin;
5542   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5543   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5544   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5545   ierr = MatDestroy(&At);CHKERRQ(ierr);
5546   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5547   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5548   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5549   PetscFunctionReturn(0);
5550 }
5551 
5552 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5553 {
5554   PetscErrorCode ierr;
5555   PetscInt       m=A->rmap->n,n=B->cmap->n;
5556   Mat            Cmat;
5557 
5558   PetscFunctionBegin;
5559   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5560   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5561   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5562   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5563   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5564   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5565   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5566   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5567 
5568   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5569 
5570   *C = Cmat;
5571   PetscFunctionReturn(0);
5572 }
5573 
5574 /* ----------------------------------------------------------------*/
5575 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5576 {
5577   PetscErrorCode ierr;
5578 
5579   PetscFunctionBegin;
5580   if (scall == MAT_INITIAL_MATRIX) {
5581     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5582     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5583     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5584   }
5585   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5586   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5587   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5588   PetscFunctionReturn(0);
5589 }
5590 
5591 /*MC
5592    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5593 
5594    Options Database Keys:
5595 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5596 
5597   Level: beginner
5598 
5599 .seealso: MatCreateAIJ()
5600 M*/
5601 
5602 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5603 {
5604   Mat_MPIAIJ     *b;
5605   PetscErrorCode ierr;
5606   PetscMPIInt    size;
5607 
5608   PetscFunctionBegin;
5609   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5610 
5611   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5612   B->data       = (void*)b;
5613   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5614   B->assembled  = PETSC_FALSE;
5615   B->insertmode = NOT_SET_VALUES;
5616   b->size       = size;
5617 
5618   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5619 
5620   /* build cache for off array entries formed */
5621   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5622 
5623   b->donotstash  = PETSC_FALSE;
5624   b->colmap      = 0;
5625   b->garray      = 0;
5626   b->roworiented = PETSC_TRUE;
5627 
5628   /* stuff used for matrix vector multiply */
5629   b->lvec  = NULL;
5630   b->Mvctx = NULL;
5631 
5632   /* stuff for MatGetRow() */
5633   b->rowindices   = 0;
5634   b->rowvalues    = 0;
5635   b->getrowactive = PETSC_FALSE;
5636 
5637   /* flexible pointer used in CUSP/CUSPARSE classes */
5638   b->spptr = NULL;
5639 
5640   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5641   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5643   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5644   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5646   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5649 #if defined(PETSC_HAVE_MKL_SPARSE)
5650   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5651 #endif
5652   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5654 #if defined(PETSC_HAVE_ELEMENTAL)
5655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5656 #endif
5657 #if defined(PETSC_HAVE_HYPRE)
5658   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5659 #endif
5660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5665 #if defined(PETSC_HAVE_HYPRE)
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5667 #endif
5668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5669   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5670   PetscFunctionReturn(0);
5671 }
5672 
5673 /*@C
5674      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5675          and "off-diagonal" part of the matrix in CSR format.
5676 
5677    Collective on MPI_Comm
5678 
5679    Input Parameters:
5680 +  comm - MPI communicator
5681 .  m - number of local rows (Cannot be PETSC_DECIDE)
5682 .  n - This value should be the same as the local size used in creating the
5683        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5684        calculated if N is given) For square matrices n is almost always m.
5685 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5686 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5687 .   i - row indices for "diagonal" portion of matrix
5688 .   j - column indices
5689 .   a - matrix values
5690 .   oi - row indices for "off-diagonal" portion of matrix
5691 .   oj - column indices
5692 -   oa - matrix values
5693 
5694    Output Parameter:
5695 .   mat - the matrix
5696 
5697    Level: advanced
5698 
5699    Notes:
5700        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5701        must free the arrays once the matrix has been destroyed and not before.
5702 
5703        The i and j indices are 0 based
5704 
5705        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5706 
5707        This sets local rows and cannot be used to set off-processor values.
5708 
5709        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5710        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5711        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5712        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5713        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5714        communication if it is known that only local entries will be set.
5715 
5716 .keywords: matrix, aij, compressed row, sparse, parallel
5717 
5718 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5719           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5720 @*/
5721 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5722 {
5723   PetscErrorCode ierr;
5724   Mat_MPIAIJ     *maij;
5725 
5726   PetscFunctionBegin;
5727   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5728   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5729   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5730   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5731   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5732   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5733   maij = (Mat_MPIAIJ*) (*mat)->data;
5734 
5735   (*mat)->preallocated = PETSC_TRUE;
5736 
5737   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5738   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5739 
5740   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5741   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5742 
5743   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5744   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5745   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5746   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5747 
5748   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5749   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5750   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5751   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5752   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5753   PetscFunctionReturn(0);
5754 }
5755 
5756 /*
5757     Special version for direct calls from Fortran
5758 */
5759 #include <petsc/private/fortranimpl.h>
5760 
5761 /* Change these macros so can be used in void function */
5762 #undef CHKERRQ
5763 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5764 #undef SETERRQ2
5765 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5766 #undef SETERRQ3
5767 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5768 #undef SETERRQ
5769 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5770 
5771 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5772 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5773 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5774 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5775 #else
5776 #endif
5777 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5778 {
5779   Mat            mat  = *mmat;
5780   PetscInt       m    = *mm, n = *mn;
5781   InsertMode     addv = *maddv;
5782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5783   PetscScalar    value;
5784   PetscErrorCode ierr;
5785 
5786   MatCheckPreallocated(mat,1);
5787   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5788 
5789 #if defined(PETSC_USE_DEBUG)
5790   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5791 #endif
5792   {
5793     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5794     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5795     PetscBool roworiented = aij->roworiented;
5796 
5797     /* Some Variables required in the macro */
5798     Mat        A                 = aij->A;
5799     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5800     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5801     MatScalar  *aa               = a->a;
5802     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5803     Mat        B                 = aij->B;
5804     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5805     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5806     MatScalar  *ba               = b->a;
5807 
5808     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5809     PetscInt  nonew = a->nonew;
5810     MatScalar *ap1,*ap2;
5811 
5812     PetscFunctionBegin;
5813     for (i=0; i<m; i++) {
5814       if (im[i] < 0) continue;
5815 #if defined(PETSC_USE_DEBUG)
5816       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5817 #endif
5818       if (im[i] >= rstart && im[i] < rend) {
5819         row      = im[i] - rstart;
5820         lastcol1 = -1;
5821         rp1      = aj + ai[row];
5822         ap1      = aa + ai[row];
5823         rmax1    = aimax[row];
5824         nrow1    = ailen[row];
5825         low1     = 0;
5826         high1    = nrow1;
5827         lastcol2 = -1;
5828         rp2      = bj + bi[row];
5829         ap2      = ba + bi[row];
5830         rmax2    = bimax[row];
5831         nrow2    = bilen[row];
5832         low2     = 0;
5833         high2    = nrow2;
5834 
5835         for (j=0; j<n; j++) {
5836           if (roworiented) value = v[i*n+j];
5837           else value = v[i+j*m];
5838           if (in[j] >= cstart && in[j] < cend) {
5839             col = in[j] - cstart;
5840             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5841             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5842           } else if (in[j] < 0) continue;
5843 #if defined(PETSC_USE_DEBUG)
5844           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5845           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5846 #endif
5847           else {
5848             if (mat->was_assembled) {
5849               if (!aij->colmap) {
5850                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5851               }
5852 #if defined(PETSC_USE_CTABLE)
5853               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5854               col--;
5855 #else
5856               col = aij->colmap[in[j]] - 1;
5857 #endif
5858               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5859               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5860                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5861                 col  =  in[j];
5862                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5863                 B     = aij->B;
5864                 b     = (Mat_SeqAIJ*)B->data;
5865                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5866                 rp2   = bj + bi[row];
5867                 ap2   = ba + bi[row];
5868                 rmax2 = bimax[row];
5869                 nrow2 = bilen[row];
5870                 low2  = 0;
5871                 high2 = nrow2;
5872                 bm    = aij->B->rmap->n;
5873                 ba    = b->a;
5874               }
5875             } else col = in[j];
5876             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5877           }
5878         }
5879       } else if (!aij->donotstash) {
5880         if (roworiented) {
5881           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5882         } else {
5883           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5884         }
5885       }
5886     }
5887   }
5888   PetscFunctionReturnVoid();
5889 }
5890 
5891