xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a8661f626dbc2b20047248f09ebecfa904ec1319)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
639 */
640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
641 {
642   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
643   Mat            A           = aij->A; /* diagonal part of the matrix */
644   Mat            B           = aij->B; /* offdiagonal part of the matrix */
645   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
646   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
647   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
648   PetscInt       *ailen      = a->ilen,*aj = a->j;
649   PetscInt       *bilen      = b->ilen,*bj = b->j;
650   PetscInt       am          = aij->A->rmap->n,j;
651   PetscInt       diag_so_far = 0,dnz;
652   PetscInt       offd_so_far = 0,onz;
653 
654   PetscFunctionBegin;
655   /* Iterate over all rows of the matrix */
656   for (j=0; j<am; j++) {
657     dnz = onz = 0;
658     /*  Iterate over all non-zero columns of the current row */
659     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
660       /* If column is in the diagonal */
661       if (mat_j[col] >= cstart && mat_j[col] < cend) {
662         aj[diag_so_far++] = mat_j[col] - cstart;
663         dnz++;
664       } else { /* off-diagonal entries */
665         bj[offd_so_far++] = mat_j[col];
666         onz++;
667       }
668     }
669     ailen[j] = dnz;
670     bilen[j] = onz;
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
677     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
678     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
679     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
680     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
681 */
682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
683 {
684   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
685   Mat            A      = aij->A; /* diagonal part of the matrix */
686   Mat            B      = aij->B; /* offdiagonal part of the matrix */
687   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
688   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
689   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
690   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
691   PetscInt       *ailen = a->ilen,*aj = a->j;
692   PetscInt       *bilen = b->ilen,*bj = b->j;
693   PetscInt       am     = aij->A->rmap->n,j;
694   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
695   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
696   PetscScalar    *aa = a->a,*ba = b->a;
697 
698   PetscFunctionBegin;
699   /* Iterate over all rows of the matrix */
700   for (j=0; j<am; j++) {
701     dnz_row = onz_row = 0;
702     rowstart_offd = full_offd_i[j];
703     rowstart_diag = full_diag_i[j];
704     /*  Iterate over all non-zero columns of the current row */
705     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
706       /* If column is in the diagonal */
707       if (mat_j[col] >= cstart && mat_j[col] < cend) {
708         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
709         aa[rowstart_diag+dnz_row] = mat_a[col];
710         dnz_row++;
711       } else { /* off-diagonal entries */
712         bj[rowstart_offd+onz_row] = mat_j[col];
713         ba[rowstart_offd+onz_row] = mat_a[col];
714         onz_row++;
715       }
716     }
717     ailen[j] = dnz_row;
718     bilen[j] = onz_row;
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
724 {
725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
726   PetscErrorCode ierr;
727   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
728   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
729 
730   PetscFunctionBegin;
731   for (i=0; i<m; i++) {
732     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
733     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
734     if (idxm[i] >= rstart && idxm[i] < rend) {
735       row = idxm[i] - rstart;
736       for (j=0; j<n; j++) {
737         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
738         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
739         if (idxn[j] >= cstart && idxn[j] < cend) {
740           col  = idxn[j] - cstart;
741           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
742         } else {
743           if (!aij->colmap) {
744             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
745           }
746 #if defined(PETSC_USE_CTABLE)
747           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
748           col--;
749 #else
750           col = aij->colmap[idxn[j]] - 1;
751 #endif
752           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
753           else {
754             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
755           }
756         }
757       }
758     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
764 
765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
766 {
767   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
768   PetscErrorCode ierr;
769   PetscInt       nstash,reallocs;
770 
771   PetscFunctionBegin;
772   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
773 
774   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
775   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
776   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
781 {
782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
783   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
784   PetscErrorCode ierr;
785   PetscMPIInt    n;
786   PetscInt       i,j,rstart,ncols,flg;
787   PetscInt       *row,*col;
788   PetscBool      other_disassembled;
789   PetscScalar    *val;
790 
791   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
792 
793   PetscFunctionBegin;
794   if (!aij->donotstash && !mat->nooffprocentries) {
795     while (1) {
796       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
797       if (!flg) break;
798 
799       for (i=0; i<n; ) {
800         /* Now identify the consecutive vals belonging to the same row */
801         for (j=i,rstart=row[j]; j<n; j++) {
802           if (row[j] != rstart) break;
803         }
804         if (j < n) ncols = j-i;
805         else       ncols = n-i;
806         /* Now assemble all these values with a single function call */
807         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
808 
809         i = j;
810       }
811     }
812     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
813   }
814   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
815   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
816 
817   /* determine if any processor has disassembled, if so we must
818      also disassemble ourselfs, in order that we may reassemble. */
819   /*
820      if nonzero structure of submatrix B cannot change then we know that
821      no processor disassembled thus we can skip this stuff
822   */
823   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
824     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
825     if (mat->was_assembled && !other_disassembled) {
826       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
827     }
828   }
829   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
830     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
831   }
832   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
833   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
835 
836   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
837 
838   aij->rowvalues = 0;
839 
840   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
841   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
842 
843   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
844   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
845     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
846     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
852 {
853   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
854   PetscErrorCode ierr;
855 
856   PetscFunctionBegin;
857   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
858   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
865   PetscInt      *lrows;
866   PetscInt       r, len;
867   PetscBool      cong;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
873   /* fix right hand side if needed */
874   if (x && b) {
875     const PetscScalar *xx;
876     PetscScalar       *bb;
877 
878     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
879     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
880     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
881     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
882     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
883   }
884   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
885   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
886   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
887   if ((diag != 0.0) && cong) {
888     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
889   } else if (diag != 0.0) {
890     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
891     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
892     for (r = 0; r < len; ++r) {
893       const PetscInt row = lrows[r] + A->rmap->rstart;
894       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
895     }
896     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
898   } else {
899     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
900   }
901   ierr = PetscFree(lrows);CHKERRQ(ierr);
902 
903   /* only change matrix nonzero state if pattern was allowed to be changed */
904   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
905     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
906     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
912 {
913   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
914   PetscErrorCode    ierr;
915   PetscMPIInt       n = A->rmap->n;
916   PetscInt          i,j,r,m,p = 0,len = 0;
917   PetscInt          *lrows,*owners = A->rmap->range;
918   PetscSFNode       *rrows;
919   PetscSF           sf;
920   const PetscScalar *xx;
921   PetscScalar       *bb,*mask;
922   Vec               xmask,lmask;
923   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
924   const PetscInt    *aj, *ii,*ridx;
925   PetscScalar       *aa;
926 
927   PetscFunctionBegin;
928   /* Create SF where leaves are input rows and roots are owned rows */
929   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
930   for (r = 0; r < n; ++r) lrows[r] = -1;
931   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
932   for (r = 0; r < N; ++r) {
933     const PetscInt idx   = rows[r];
934     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
935     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
936       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
937     }
938     rrows[r].rank  = p;
939     rrows[r].index = rows[r] - owners[p];
940   }
941   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
942   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
943   /* Collect flags for rows to be zeroed */
944   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
945   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
946   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
947   /* Compress and put in row numbers */
948   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
949   /* zero diagonal part of matrix */
950   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
951   /* handle off diagonal part of matrix */
952   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
953   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
954   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
955   for (i=0; i<len; i++) bb[lrows[i]] = 1;
956   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
957   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
960   if (x) {
961     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
964     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
965   }
966   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
967   /* remove zeroed rows of off diagonal matrix */
968   ii = aij->i;
969   for (i=0; i<len; i++) {
970     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
971   }
972   /* loop over all elements of off process part of matrix zeroing removed columns*/
973   if (aij->compressedrow.use) {
974     m    = aij->compressedrow.nrows;
975     ii   = aij->compressedrow.i;
976     ridx = aij->compressedrow.rindex;
977     for (i=0; i<m; i++) {
978       n  = ii[i+1] - ii[i];
979       aj = aij->j + ii[i];
980       aa = aij->a + ii[i];
981 
982       for (j=0; j<n; j++) {
983         if (PetscAbsScalar(mask[*aj])) {
984           if (b) bb[*ridx] -= *aa*xx[*aj];
985           *aa = 0.0;
986         }
987         aa++;
988         aj++;
989       }
990       ridx++;
991     }
992   } else { /* do not use compressed row format */
993     m = l->B->rmap->n;
994     for (i=0; i<m; i++) {
995       n  = ii[i+1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij->a + ii[i];
998       for (j=0; j<n; j++) {
999         if (PetscAbsScalar(mask[*aj])) {
1000           if (b) bb[i] -= *aa*xx[*aj];
1001           *aa = 0.0;
1002         }
1003         aa++;
1004         aj++;
1005       }
1006     }
1007   }
1008   if (x) {
1009     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1010     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011   }
1012   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1013   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1014   ierr = PetscFree(lrows);CHKERRQ(ierr);
1015 
1016   /* only change matrix nonzero state if pattern was allowed to be changed */
1017   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1018     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1019     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1025 {
1026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1027   PetscErrorCode ierr;
1028   PetscInt       nt;
1029   VecScatter     Mvctx = a->Mvctx;
1030 
1031   PetscFunctionBegin;
1032   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1033   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1034 
1035   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1043 {
1044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   VecScatter     Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1060   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071   PetscBool      merged;
1072 
1073   PetscFunctionBegin;
1074   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1075   /* do nondiagonal part */
1076   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1077   if (!merged) {
1078     /* send it on its way */
1079     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080     /* do local part */
1081     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1082     /* receive remote parts: note this assumes the values are not actually */
1083     /* added in yy until the next line, */
1084     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   } else {
1086     /* do local part */
1087     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1088     /* send it on its way */
1089     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090     /* values actually were received in the Begin() but we need to call this nop */
1091     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1097 {
1098   MPI_Comm       comm;
1099   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1100   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1101   IS             Me,Notme;
1102   PetscErrorCode ierr;
1103   PetscInt       M,N,first,last,*notme,i;
1104   PetscMPIInt    size;
1105 
1106   PetscFunctionBegin;
1107   /* Easy test: symmetric diagonal block */
1108   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1109   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1110   if (!*f) PetscFunctionReturn(0);
1111   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1112   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1113   if (size == 1) PetscFunctionReturn(0);
1114 
1115   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1116   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1117   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1118   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1119   for (i=0; i<first; i++) notme[i] = i;
1120   for (i=last; i<M; i++) notme[i-last+first] = i;
1121   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1122   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1123   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1124   Aoff = Aoffs[0];
1125   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1126   Boff = Boffs[0];
1127   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1128   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1129   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1130   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1131   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1132   ierr = PetscFree(notme);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1137 {
1138   PetscErrorCode ierr;
1139 
1140   PetscFunctionBegin;
1141   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* send it on its way */
1154   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155   /* do local part */
1156   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1157   /* receive remote parts */
1158   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 /*
1163   This only works correctly for square matrices where the subblock A->A is the
1164    diagonal block
1165 */
1166 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1167 {
1168   PetscErrorCode ierr;
1169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1170 
1171   PetscFunctionBegin;
1172   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1173   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1174   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1179 {
1180   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1181   PetscErrorCode ierr;
1182 
1183   PetscFunctionBegin;
1184   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1185   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1190 {
1191   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195 #if defined(PETSC_USE_LOG)
1196   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1197 #endif
1198   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1199   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1200   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1201   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1202 #if defined(PETSC_USE_CTABLE)
1203   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1204 #else
1205   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1206 #endif
1207   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1208   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1209   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1210   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1211   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1212   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1213   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1214 
1215   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1224 #if defined(PETSC_HAVE_ELEMENTAL)
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1226 #endif
1227 #if defined(PETSC_HAVE_HYPRE)
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1230 #endif
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1237 {
1238   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1239   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1240   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1241   PetscErrorCode ierr;
1242   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1243   int            fd;
1244   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1245   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1246   PetscScalar    *column_values;
1247   PetscInt       message_count,flowcontrolcount;
1248   FILE           *file;
1249 
1250   PetscFunctionBegin;
1251   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1252   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1253   nz   = A->nz + B->nz;
1254   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1255   if (!rank) {
1256     header[0] = MAT_FILE_CLASSID;
1257     header[1] = mat->rmap->N;
1258     header[2] = mat->cmap->N;
1259 
1260     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1262     /* get largest number of rows any processor has */
1263     rlen  = mat->rmap->n;
1264     range = mat->rmap->range;
1265     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1266   } else {
1267     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     rlen = mat->rmap->n;
1269   }
1270 
1271   /* load up the local row counts */
1272   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1273   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1274 
1275   /* store the row lengths to the file */
1276   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1277   if (!rank) {
1278     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1279     for (i=1; i<size; i++) {
1280       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1281       rlen = range[i+1] - range[i];
1282       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1284     }
1285     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1286   } else {
1287     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1288     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1289     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1290   }
1291   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1292 
1293   /* load up the local column indices */
1294   nzmax = nz; /* th processor needs space a largest processor needs */
1295   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1296   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1297   cnt   = 0;
1298   for (i=0; i<mat->rmap->n; i++) {
1299     for (j=B->i[i]; j<B->i[i+1]; j++) {
1300       if ((col = garray[B->j[j]]) > cstart) break;
1301       column_indices[cnt++] = col;
1302     }
1303     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1304     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1305   }
1306   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1307 
1308   /* store the column indices to the file */
1309   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1310   if (!rank) {
1311     MPI_Status status;
1312     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1313     for (i=1; i<size; i++) {
1314       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1315       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1316       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1317       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1318       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1319     }
1320     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1321   } else {
1322     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1323     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1324     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1325     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1326   }
1327   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1328 
1329   /* load up the local column values */
1330   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1331   cnt  = 0;
1332   for (i=0; i<mat->rmap->n; i++) {
1333     for (j=B->i[i]; j<B->i[i+1]; j++) {
1334       if (garray[B->j[j]] > cstart) break;
1335       column_values[cnt++] = B->a[j];
1336     }
1337     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1338     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1339   }
1340   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1341 
1342   /* store the column values to the file */
1343   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1344   if (!rank) {
1345     MPI_Status status;
1346     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1347     for (i=1; i<size; i++) {
1348       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1349       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1350       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1351       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1352       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1353     }
1354     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1355   } else {
1356     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1357     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1358     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1359     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1360   }
1361   ierr = PetscFree(column_values);CHKERRQ(ierr);
1362 
1363   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1364   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #include <petscdraw.h>
1369 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1370 {
1371   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1372   PetscErrorCode    ierr;
1373   PetscMPIInt       rank = aij->rank,size = aij->size;
1374   PetscBool         isdraw,iascii,isbinary;
1375   PetscViewer       sviewer;
1376   PetscViewerFormat format;
1377 
1378   PetscFunctionBegin;
1379   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1380   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1381   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1382   if (iascii) {
1383     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1384     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1385       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1386       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1387       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1388       for (i=0; i<(PetscInt)size; i++) {
1389         nmax = PetscMax(nmax,nz[i]);
1390         nmin = PetscMin(nmin,nz[i]);
1391         navg += nz[i];
1392       }
1393       ierr = PetscFree(nz);CHKERRQ(ierr);
1394       navg = navg/size;
1395       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1396       PetscFunctionReturn(0);
1397     }
1398     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1399     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1400       MatInfo   info;
1401       PetscBool inodes;
1402 
1403       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1404       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1405       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1406       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1407       if (!inodes) {
1408         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1409                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1410       } else {
1411         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1412                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1413       }
1414       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1415       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1416       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1417       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1418       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1420       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1421       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1422       PetscFunctionReturn(0);
1423     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1424       PetscInt inodecount,inodelimit,*inodes;
1425       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1426       if (inodes) {
1427         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1428       } else {
1429         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1430       }
1431       PetscFunctionReturn(0);
1432     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1433       PetscFunctionReturn(0);
1434     }
1435   } else if (isbinary) {
1436     if (size == 1) {
1437       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1438       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1439     } else {
1440       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1441     }
1442     PetscFunctionReturn(0);
1443   } else if (isdraw) {
1444     PetscDraw draw;
1445     PetscBool isnull;
1446     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1447     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1448     if (isnull) PetscFunctionReturn(0);
1449   }
1450 
1451   {
1452     /* assemble the entire matrix onto first processor. */
1453     Mat        A;
1454     Mat_SeqAIJ *Aloc;
1455     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1456     MatScalar  *a;
1457 
1458     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1459     if (!rank) {
1460       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1461     } else {
1462       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1463     }
1464     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1465     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1466     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1467     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1468     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1469 
1470     /* copy over the A part */
1471     Aloc = (Mat_SeqAIJ*)aij->A->data;
1472     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1473     row  = mat->rmap->rstart;
1474     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1475     for (i=0; i<m; i++) {
1476       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1477       row++;
1478       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1479     }
1480     aj = Aloc->j;
1481     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1482 
1483     /* copy over the B part */
1484     Aloc = (Mat_SeqAIJ*)aij->B->data;
1485     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1486     row  = mat->rmap->rstart;
1487     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1488     ct   = cols;
1489     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1490     for (i=0; i<m; i++) {
1491       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1492       row++;
1493       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1494     }
1495     ierr = PetscFree(ct);CHKERRQ(ierr);
1496     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1497     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1498     /*
1499        Everyone has to call to draw the matrix since the graphics waits are
1500        synchronized across all processors that share the PetscDraw object
1501     */
1502     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1503     if (!rank) {
1504       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1505       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1506     }
1507     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1508     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1509     ierr = MatDestroy(&A);CHKERRQ(ierr);
1510   }
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1515 {
1516   PetscErrorCode ierr;
1517   PetscBool      iascii,isdraw,issocket,isbinary;
1518 
1519   PetscFunctionBegin;
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1521   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1522   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1523   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1524   if (iascii || isdraw || isbinary || issocket) {
1525     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1526   }
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1531 {
1532   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1533   PetscErrorCode ierr;
1534   Vec            bb1 = 0;
1535   PetscBool      hasop;
1536 
1537   PetscFunctionBegin;
1538   if (flag == SOR_APPLY_UPPER) {
1539     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1540     PetscFunctionReturn(0);
1541   }
1542 
1543   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1544     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1545   }
1546 
1547   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1548     if (flag & SOR_ZERO_INITIAL_GUESS) {
1549       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1550       its--;
1551     }
1552 
1553     while (its--) {
1554       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1556 
1557       /* update rhs: bb1 = bb - B*x */
1558       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1559       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1560 
1561       /* local sweep */
1562       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1563     }
1564   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1565     if (flag & SOR_ZERO_INITIAL_GUESS) {
1566       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1567       its--;
1568     }
1569     while (its--) {
1570       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1571       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1572 
1573       /* update rhs: bb1 = bb - B*x */
1574       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1575       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1576 
1577       /* local sweep */
1578       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1579     }
1580   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1581     if (flag & SOR_ZERO_INITIAL_GUESS) {
1582       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1583       its--;
1584     }
1585     while (its--) {
1586       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1587       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1588 
1589       /* update rhs: bb1 = bb - B*x */
1590       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1591       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1592 
1593       /* local sweep */
1594       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1595     }
1596   } else if (flag & SOR_EISENSTAT) {
1597     Vec xx1;
1598 
1599     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1600     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1601 
1602     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604     if (!mat->diag) {
1605       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1606       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1607     }
1608     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1609     if (hasop) {
1610       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1611     } else {
1612       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1613     }
1614     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1615 
1616     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1617 
1618     /* local sweep */
1619     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1620     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1621     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1622   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1623 
1624   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1625 
1626   matin->factorerrortype = mat->A->factorerrortype;
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1631 {
1632   Mat            aA,aB,Aperm;
1633   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1634   PetscScalar    *aa,*ba;
1635   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1636   PetscSF        rowsf,sf;
1637   IS             parcolp = NULL;
1638   PetscBool      done;
1639   PetscErrorCode ierr;
1640 
1641   PetscFunctionBegin;
1642   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1643   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1644   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1645   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1646 
1647   /* Invert row permutation to find out where my rows should go */
1648   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1649   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1650   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1651   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1652   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1653   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1654 
1655   /* Invert column permutation to find out where my columns should go */
1656   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1657   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1658   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1659   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1660   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1661   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1662   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1663 
1664   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1665   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1666   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1667 
1668   /* Find out where my gcols should go */
1669   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1670   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1671   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1672   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1673   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1674   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1675   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1676   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1677 
1678   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1679   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1680   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1681   for (i=0; i<m; i++) {
1682     PetscInt row = rdest[i],rowner;
1683     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1684     for (j=ai[i]; j<ai[i+1]; j++) {
1685       PetscInt cowner,col = cdest[aj[j]];
1686       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1687       if (rowner == cowner) dnnz[i]++;
1688       else onnz[i]++;
1689     }
1690     for (j=bi[i]; j<bi[i+1]; j++) {
1691       PetscInt cowner,col = gcdest[bj[j]];
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696   }
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1701   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1702 
1703   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1705   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1706   for (i=0; i<m; i++) {
1707     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1708     PetscInt j0,rowlen;
1709     rowlen = ai[i+1] - ai[i];
1710     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1711       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1712       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714     rowlen = bi[i+1] - bi[i];
1715     for (j0=j=0; j<rowlen; j0=j) {
1716       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1717       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1718     }
1719   }
1720   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1723   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1725   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1726   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1727   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1728   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1729   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1730   *B = Aperm;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1735 {
1736   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1741   if (ghosts) *ghosts = aij->garray;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1746 {
1747   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1748   Mat            A    = mat->A,B = mat->B;
1749   PetscErrorCode ierr;
1750   PetscReal      isend[5],irecv[5];
1751 
1752   PetscFunctionBegin;
1753   info->block_size = 1.0;
1754   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1755 
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758 
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760 
1761   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1762   isend[3] += info->memory;  isend[4] += info->mallocs;
1763   if (flag == MAT_LOCAL) {
1764     info->nz_used      = isend[0];
1765     info->nz_allocated = isend[1];
1766     info->nz_unneeded  = isend[2];
1767     info->memory       = isend[3];
1768     info->mallocs      = isend[4];
1769   } else if (flag == MAT_GLOBAL_MAX) {
1770     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1771 
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1779 
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_USE_INODES:
1805   case MAT_IGNORE_ZERO_ENTRIES:
1806     MatCheckPreallocated(A,1);
1807     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1808     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1809     break;
1810   case MAT_ROW_ORIENTED:
1811     MatCheckPreallocated(A,1);
1812     a->roworiented = flg;
1813 
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1819     break;
1820   case MAT_IGNORE_OFF_PROC_ENTRIES:
1821     a->donotstash = flg;
1822     break;
1823   case MAT_SPD:
1824     A->spd_set = PETSC_TRUE;
1825     A->spd     = flg;
1826     if (flg) {
1827       A->symmetric                  = PETSC_TRUE;
1828       A->structurally_symmetric     = PETSC_TRUE;
1829       A->symmetric_set              = PETSC_TRUE;
1830       A->structurally_symmetric_set = PETSC_TRUE;
1831     }
1832     break;
1833   case MAT_SYMMETRIC:
1834     MatCheckPreallocated(A,1);
1835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1836     break;
1837   case MAT_STRUCTURALLY_SYMMETRIC:
1838     MatCheckPreallocated(A,1);
1839     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1840     break;
1841   case MAT_HERMITIAN:
1842     MatCheckPreallocated(A,1);
1843     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1844     break;
1845   case MAT_SYMMETRY_ETERNAL:
1846     MatCheckPreallocated(A,1);
1847     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1848     break;
1849   case MAT_SUBMAT_SINGLEIS:
1850     A->submat_singleis = flg;
1851     break;
1852   case MAT_STRUCTURE_ONLY:
1853     /* The option is handled directly by MatSetOption() */
1854     break;
1855   default:
1856     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1857   }
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1862 {
1863   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1864   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1865   PetscErrorCode ierr;
1866   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1867   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1868   PetscInt       *cmap,*idx_p;
1869 
1870   PetscFunctionBegin;
1871   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1872   mat->getrowactive = PETSC_TRUE;
1873 
1874   if (!mat->rowvalues && (idx || v)) {
1875     /*
1876         allocate enough space to hold information from the longest row.
1877     */
1878     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1879     PetscInt   max = 1,tmp;
1880     for (i=0; i<matin->rmap->n; i++) {
1881       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1882       if (max < tmp) max = tmp;
1883     }
1884     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1885   }
1886 
1887   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1888   lrow = row - rstart;
1889 
1890   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1891   if (!v)   {pvA = 0; pvB = 0;}
1892   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1893   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1894   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1895   nztot = nzA + nzB;
1896 
1897   cmap = mat->garray;
1898   if (v  || idx) {
1899     if (nztot) {
1900       /* Sort by increasing column numbers, assuming A and B already sorted */
1901       PetscInt imark = -1;
1902       if (v) {
1903         *v = v_p = mat->rowvalues;
1904         for (i=0; i<nzB; i++) {
1905           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1906           else break;
1907         }
1908         imark = i;
1909         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1910         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1911       }
1912       if (idx) {
1913         *idx = idx_p = mat->rowindices;
1914         if (imark > -1) {
1915           for (i=0; i<imark; i++) {
1916             idx_p[i] = cmap[cworkB[i]];
1917           }
1918         } else {
1919           for (i=0; i<nzB; i++) {
1920             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1921             else break;
1922           }
1923           imark = i;
1924         }
1925         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1926         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1927       }
1928     } else {
1929       if (idx) *idx = 0;
1930       if (v)   *v   = 0;
1931     }
1932   }
1933   *nz  = nztot;
1934   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1935   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1940 {
1941   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1942 
1943   PetscFunctionBegin;
1944   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1945   aij->getrowactive = PETSC_FALSE;
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1950 {
1951   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1952   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1953   PetscErrorCode ierr;
1954   PetscInt       i,j,cstart = mat->cmap->rstart;
1955   PetscReal      sum = 0.0;
1956   MatScalar      *v;
1957 
1958   PetscFunctionBegin;
1959   if (aij->size == 1) {
1960     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1961   } else {
1962     if (type == NORM_FROBENIUS) {
1963       v = amat->a;
1964       for (i=0; i<amat->nz; i++) {
1965         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1966       }
1967       v = bmat->a;
1968       for (i=0; i<bmat->nz; i++) {
1969         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1970       }
1971       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1972       *norm = PetscSqrtReal(*norm);
1973       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1974     } else if (type == NORM_1) { /* max column norm */
1975       PetscReal *tmp,*tmp2;
1976       PetscInt  *jj,*garray = aij->garray;
1977       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1978       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1979       *norm = 0.0;
1980       v     = amat->a; jj = amat->j;
1981       for (j=0; j<amat->nz; j++) {
1982         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1983       }
1984       v = bmat->a; jj = bmat->j;
1985       for (j=0; j<bmat->nz; j++) {
1986         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1987       }
1988       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1989       for (j=0; j<mat->cmap->N; j++) {
1990         if (tmp2[j] > *norm) *norm = tmp2[j];
1991       }
1992       ierr = PetscFree(tmp);CHKERRQ(ierr);
1993       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1994       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1995     } else if (type == NORM_INFINITY) { /* max row norm */
1996       PetscReal ntemp = 0.0;
1997       for (j=0; j<aij->A->rmap->n; j++) {
1998         v   = amat->a + amat->i[j];
1999         sum = 0.0;
2000         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2001           sum += PetscAbsScalar(*v); v++;
2002         }
2003         v = bmat->a + bmat->i[j];
2004         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2005           sum += PetscAbsScalar(*v); v++;
2006         }
2007         if (sum > ntemp) ntemp = sum;
2008       }
2009       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2010       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2011     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2017 {
2018   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2019   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2020   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2021   PetscErrorCode ierr;
2022   Mat            B,A_diag,*B_diag;
2023   MatScalar      *array;
2024 
2025   PetscFunctionBegin;
2026   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2027   ai = Aloc->i; aj = Aloc->j;
2028   bi = Bloc->i; bj = Bloc->j;
2029   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2030     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2031     PetscSFNode          *oloc;
2032     PETSC_UNUSED PetscSF sf;
2033 
2034     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2035     /* compute d_nnz for preallocation */
2036     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2037     for (i=0; i<ai[ma]; i++) {
2038       d_nnz[aj[i]]++;
2039     }
2040     /* compute local off-diagonal contributions */
2041     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2042     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2043     /* map those to global */
2044     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2045     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2046     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2047     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2048     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2049     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2050     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2051 
2052     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2053     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2054     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2055     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2056     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2057     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2058   } else {
2059     B    = *matout;
2060     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2061   }
2062 
2063   b = B->data;
2064   A_diag = a->A;
2065   B_diag = &b->A;
2066   sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data;
2067   A_diag_ncol = A_diag->cmap->N;
2068   B_diag_ilen = sub_B_diag->ilen;
2069   B_diag_i = sub_B_diag->i;
2070   for (i=0; i<A_diag_ncol; i++) {
2071     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2072   }
2073   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2074   very quickly (=without using MatSetValues), because all writes are local. */
2075   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2076   /* copy over the B part */
2077   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2078   array = Bloc->a;
2079   row   = A->rmap->rstart;
2080   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2081   cols_tmp = cols;
2082   for (i=0; i<mb; i++) {
2083     ncol = bi[i+1]-bi[i];
2084     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2085     row++;
2086     array += ncol; cols_tmp += ncol;
2087   }
2088   ierr = PetscFree(cols);CHKERRQ(ierr);
2089 
2090   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2091   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2092   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2093     *matout = B;
2094   } else {
2095     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2096   }
2097   PetscFunctionReturn(0);
2098 }
2099 
2100 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2101 {
2102   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2103   Mat            a    = aij->A,b = aij->B;
2104   PetscErrorCode ierr;
2105   PetscInt       s1,s2,s3;
2106 
2107   PetscFunctionBegin;
2108   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2109   if (rr) {
2110     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2111     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2112     /* Overlap communication with computation. */
2113     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2114   }
2115   if (ll) {
2116     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2117     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2118     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2119   }
2120   /* scale  the diagonal block */
2121   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2122 
2123   if (rr) {
2124     /* Do a scatter end and then right scale the off-diagonal block */
2125     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2126     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2127   }
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2132 {
2133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2134   PetscErrorCode ierr;
2135 
2136   PetscFunctionBegin;
2137   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2142 {
2143   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2144   Mat            a,b,c,d;
2145   PetscBool      flg;
2146   PetscErrorCode ierr;
2147 
2148   PetscFunctionBegin;
2149   a = matA->A; b = matA->B;
2150   c = matB->A; d = matB->B;
2151 
2152   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2153   if (flg) {
2154     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2155   }
2156   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2161 {
2162   PetscErrorCode ierr;
2163   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2164   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2165 
2166   PetscFunctionBegin;
2167   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2168   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2169     /* because of the column compression in the off-processor part of the matrix a->B,
2170        the number of columns in a->B and b->B may be different, hence we cannot call
2171        the MatCopy() directly on the two parts. If need be, we can provide a more
2172        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2173        then copying the submatrices */
2174     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2175   } else {
2176     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2177     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2178   }
2179   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2184 {
2185   PetscErrorCode ierr;
2186 
2187   PetscFunctionBegin;
2188   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 /*
2193    Computes the number of nonzeros per row needed for preallocation when X and Y
2194    have different nonzero structure.
2195 */
2196 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2197 {
2198   PetscInt       i,j,k,nzx,nzy;
2199 
2200   PetscFunctionBegin;
2201   /* Set the number of nonzeros in the new matrix */
2202   for (i=0; i<m; i++) {
2203     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2204     nzx = xi[i+1] - xi[i];
2205     nzy = yi[i+1] - yi[i];
2206     nnz[i] = 0;
2207     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2208       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2209       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2210       nnz[i]++;
2211     }
2212     for (; k<nzy; k++) nnz[i]++;
2213   }
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2218 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2219 {
2220   PetscErrorCode ierr;
2221   PetscInt       m = Y->rmap->N;
2222   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2223   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2224 
2225   PetscFunctionBegin;
2226   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2231 {
2232   PetscErrorCode ierr;
2233   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2234   PetscBLASInt   bnz,one=1;
2235   Mat_SeqAIJ     *x,*y;
2236 
2237   PetscFunctionBegin;
2238   if (str == SAME_NONZERO_PATTERN) {
2239     PetscScalar alpha = a;
2240     x    = (Mat_SeqAIJ*)xx->A->data;
2241     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2242     y    = (Mat_SeqAIJ*)yy->A->data;
2243     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2244     x    = (Mat_SeqAIJ*)xx->B->data;
2245     y    = (Mat_SeqAIJ*)yy->B->data;
2246     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2247     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2248     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2249   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2250     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2251   } else {
2252     Mat      B;
2253     PetscInt *nnz_d,*nnz_o;
2254     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2255     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2256     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2257     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2258     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2259     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2260     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2261     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2262     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2263     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2264     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2265     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2266     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2267     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2268   }
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2273 
2274 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2275 {
2276 #if defined(PETSC_USE_COMPLEX)
2277   PetscErrorCode ierr;
2278   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2279 
2280   PetscFunctionBegin;
2281   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2282   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2283 #else
2284   PetscFunctionBegin;
2285 #endif
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2290 {
2291   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2292   PetscErrorCode ierr;
2293 
2294   PetscFunctionBegin;
2295   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2296   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2301 {
2302   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2303   PetscErrorCode ierr;
2304 
2305   PetscFunctionBegin;
2306   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2307   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2308   PetscFunctionReturn(0);
2309 }
2310 
2311 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2312 {
2313   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2314   PetscErrorCode ierr;
2315   PetscInt       i,*idxb = 0;
2316   PetscScalar    *va,*vb;
2317   Vec            vtmp;
2318 
2319   PetscFunctionBegin;
2320   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2321   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2322   if (idx) {
2323     for (i=0; i<A->rmap->n; i++) {
2324       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2325     }
2326   }
2327 
2328   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2329   if (idx) {
2330     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2331   }
2332   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2333   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2334 
2335   for (i=0; i<A->rmap->n; i++) {
2336     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2337       va[i] = vb[i];
2338       if (idx) idx[i] = a->garray[idxb[i]];
2339     }
2340   }
2341 
2342   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2343   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2344   ierr = PetscFree(idxb);CHKERRQ(ierr);
2345   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2350 {
2351   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2352   PetscErrorCode ierr;
2353   PetscInt       i,*idxb = 0;
2354   PetscScalar    *va,*vb;
2355   Vec            vtmp;
2356 
2357   PetscFunctionBegin;
2358   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2359   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2360   if (idx) {
2361     for (i=0; i<A->cmap->n; i++) {
2362       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2363     }
2364   }
2365 
2366   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2367   if (idx) {
2368     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2369   }
2370   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2371   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2372 
2373   for (i=0; i<A->rmap->n; i++) {
2374     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2375       va[i] = vb[i];
2376       if (idx) idx[i] = a->garray[idxb[i]];
2377     }
2378   }
2379 
2380   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2381   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2382   ierr = PetscFree(idxb);CHKERRQ(ierr);
2383   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2388 {
2389   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2390   PetscInt       n      = A->rmap->n;
2391   PetscInt       cstart = A->cmap->rstart;
2392   PetscInt       *cmap  = mat->garray;
2393   PetscInt       *diagIdx, *offdiagIdx;
2394   Vec            diagV, offdiagV;
2395   PetscScalar    *a, *diagA, *offdiagA;
2396   PetscInt       r;
2397   PetscErrorCode ierr;
2398 
2399   PetscFunctionBegin;
2400   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2401   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2402   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2403   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2404   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2405   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2406   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2407   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2408   for (r = 0; r < n; ++r) {
2409     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2410       a[r]   = diagA[r];
2411       idx[r] = cstart + diagIdx[r];
2412     } else {
2413       a[r]   = offdiagA[r];
2414       idx[r] = cmap[offdiagIdx[r]];
2415     }
2416   }
2417   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2418   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2419   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2420   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2421   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2422   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2427 {
2428   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2429   PetscInt       n      = A->rmap->n;
2430   PetscInt       cstart = A->cmap->rstart;
2431   PetscInt       *cmap  = mat->garray;
2432   PetscInt       *diagIdx, *offdiagIdx;
2433   Vec            diagV, offdiagV;
2434   PetscScalar    *a, *diagA, *offdiagA;
2435   PetscInt       r;
2436   PetscErrorCode ierr;
2437 
2438   PetscFunctionBegin;
2439   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2440   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2441   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2442   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2443   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2444   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2445   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2446   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2447   for (r = 0; r < n; ++r) {
2448     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2449       a[r]   = diagA[r];
2450       idx[r] = cstart + diagIdx[r];
2451     } else {
2452       a[r]   = offdiagA[r];
2453       idx[r] = cmap[offdiagIdx[r]];
2454     }
2455   }
2456   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2457   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2458   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2459   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2460   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2461   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2466 {
2467   PetscErrorCode ierr;
2468   Mat            *dummy;
2469 
2470   PetscFunctionBegin;
2471   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2472   *newmat = *dummy;
2473   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2478 {
2479   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2480   PetscErrorCode ierr;
2481 
2482   PetscFunctionBegin;
2483   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2484   A->factorerrortype = a->A->factorerrortype;
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2489 {
2490   PetscErrorCode ierr;
2491   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2492 
2493   PetscFunctionBegin;
2494   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2495   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2496   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2497   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2502 {
2503   PetscFunctionBegin;
2504   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2505   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 /*@
2510    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2511 
2512    Collective on Mat
2513 
2514    Input Parameters:
2515 +    A - the matrix
2516 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2517 
2518  Level: advanced
2519 
2520 @*/
2521 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2522 {
2523   PetscErrorCode       ierr;
2524 
2525   PetscFunctionBegin;
2526   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2531 {
2532   PetscErrorCode       ierr;
2533   PetscBool            sc = PETSC_FALSE,flg;
2534 
2535   PetscFunctionBegin;
2536   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2537   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2538   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2539   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2540   if (flg) {
2541     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2542   }
2543   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2548 {
2549   PetscErrorCode ierr;
2550   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2551   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2552 
2553   PetscFunctionBegin;
2554   if (!Y->preallocated) {
2555     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2556   } else if (!aij->nz) {
2557     PetscInt nonew = aij->nonew;
2558     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2559     aij->nonew = nonew;
2560   }
2561   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2566 {
2567   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2568   PetscErrorCode ierr;
2569 
2570   PetscFunctionBegin;
2571   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2572   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2573   if (d) {
2574     PetscInt rstart;
2575     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2576     *d += rstart;
2577 
2578   }
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 
2583 /* -------------------------------------------------------------------*/
2584 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2585                                        MatGetRow_MPIAIJ,
2586                                        MatRestoreRow_MPIAIJ,
2587                                        MatMult_MPIAIJ,
2588                                 /* 4*/ MatMultAdd_MPIAIJ,
2589                                        MatMultTranspose_MPIAIJ,
2590                                        MatMultTransposeAdd_MPIAIJ,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                 /*10*/ 0,
2595                                        0,
2596                                        0,
2597                                        MatSOR_MPIAIJ,
2598                                        MatTranspose_MPIAIJ,
2599                                 /*15*/ MatGetInfo_MPIAIJ,
2600                                        MatEqual_MPIAIJ,
2601                                        MatGetDiagonal_MPIAIJ,
2602                                        MatDiagonalScale_MPIAIJ,
2603                                        MatNorm_MPIAIJ,
2604                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2605                                        MatAssemblyEnd_MPIAIJ,
2606                                        MatSetOption_MPIAIJ,
2607                                        MatZeroEntries_MPIAIJ,
2608                                 /*24*/ MatZeroRows_MPIAIJ,
2609                                        0,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                 /*29*/ MatSetUp_MPIAIJ,
2614                                        0,
2615                                        0,
2616                                        MatGetDiagonalBlock_MPIAIJ,
2617                                        0,
2618                                 /*34*/ MatDuplicate_MPIAIJ,
2619                                        0,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                 /*39*/ MatAXPY_MPIAIJ,
2624                                        MatCreateSubMatrices_MPIAIJ,
2625                                        MatIncreaseOverlap_MPIAIJ,
2626                                        MatGetValues_MPIAIJ,
2627                                        MatCopy_MPIAIJ,
2628                                 /*44*/ MatGetRowMax_MPIAIJ,
2629                                        MatScale_MPIAIJ,
2630                                        MatShift_MPIAIJ,
2631                                        MatDiagonalSet_MPIAIJ,
2632                                        MatZeroRowsColumns_MPIAIJ,
2633                                 /*49*/ MatSetRandom_MPIAIJ,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2639                                        0,
2640                                        MatSetUnfactored_MPIAIJ,
2641                                        MatPermute_MPIAIJ,
2642                                        0,
2643                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2644                                        MatDestroy_MPIAIJ,
2645                                        MatView_MPIAIJ,
2646                                        0,
2647                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2648                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2649                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2654                                        MatGetRowMinAbs_MPIAIJ,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                 /*75*/ MatFDColoringApply_AIJ,
2660                                        MatSetFromOptions_MPIAIJ,
2661                                        0,
2662                                        0,
2663                                        MatFindZeroDiagonals_MPIAIJ,
2664                                 /*80*/ 0,
2665                                        0,
2666                                        0,
2667                                 /*83*/ MatLoad_MPIAIJ,
2668                                        MatIsSymmetric_MPIAIJ,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2674                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2675                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2676                                        MatPtAP_MPIAIJ_MPIAIJ,
2677                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2678                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                 /*99*/ 0,
2684                                        0,
2685                                        0,
2686                                        MatConjugate_MPIAIJ,
2687                                        0,
2688                                 /*104*/MatSetValuesRow_MPIAIJ,
2689                                        MatRealPart_MPIAIJ,
2690                                        MatImaginaryPart_MPIAIJ,
2691                                        0,
2692                                        0,
2693                                 /*109*/0,
2694                                        0,
2695                                        MatGetRowMin_MPIAIJ,
2696                                        0,
2697                                        MatMissingDiagonal_MPIAIJ,
2698                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2699                                        0,
2700                                        MatGetGhosts_MPIAIJ,
2701                                        0,
2702                                        0,
2703                                 /*119*/0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                        MatGetMultiProcBlock_MPIAIJ,
2708                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2709                                        MatGetColumnNorms_MPIAIJ,
2710                                        MatInvertBlockDiagonal_MPIAIJ,
2711                                        0,
2712                                        MatCreateSubMatricesMPI_MPIAIJ,
2713                                 /*129*/0,
2714                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2715                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2716                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2717                                        0,
2718                                 /*134*/0,
2719                                        0,
2720                                        MatRARt_MPIAIJ_MPIAIJ,
2721                                        0,
2722                                        0,
2723                                 /*139*/MatSetBlockSizes_MPIAIJ,
2724                                        0,
2725                                        0,
2726                                        MatFDColoringSetUp_MPIXAIJ,
2727                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2728                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2729 };
2730 
2731 /* ----------------------------------------------------------------------------------------*/
2732 
2733 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2734 {
2735   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2736   PetscErrorCode ierr;
2737 
2738   PetscFunctionBegin;
2739   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2740   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2745 {
2746   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2747   PetscErrorCode ierr;
2748 
2749   PetscFunctionBegin;
2750   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2751   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2752   PetscFunctionReturn(0);
2753 }
2754 
2755 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2756 {
2757   Mat_MPIAIJ     *b;
2758   PetscErrorCode ierr;
2759 
2760   PetscFunctionBegin;
2761   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2762   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2763   b = (Mat_MPIAIJ*)B->data;
2764 
2765 #if defined(PETSC_USE_CTABLE)
2766   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2767 #else
2768   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2769 #endif
2770   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2771   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2772   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2773 
2774   /* Because the B will have been resized we simply destroy it and create a new one each time */
2775   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2776   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2777   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2778   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2779   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2780   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2781 
2782   if (!B->preallocated) {
2783     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2784     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2785     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2786     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2787     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2788   }
2789 
2790   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2791   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2792   B->preallocated  = PETSC_TRUE;
2793   B->was_assembled = PETSC_FALSE;
2794   B->assembled     = PETSC_FALSE;;
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2799 {
2800   Mat_MPIAIJ     *b;
2801   PetscErrorCode ierr;
2802 
2803   PetscFunctionBegin;
2804   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2805   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2806   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2807   b = (Mat_MPIAIJ*)B->data;
2808 
2809 #if defined(PETSC_USE_CTABLE)
2810   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2811 #else
2812   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2813 #endif
2814   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2815   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2816   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2817 
2818   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2819   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2820   B->preallocated  = PETSC_TRUE;
2821   B->was_assembled = PETSC_FALSE;
2822   B->assembled = PETSC_FALSE;
2823   PetscFunctionReturn(0);
2824 }
2825 
2826 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2827 {
2828   Mat            mat;
2829   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2830   PetscErrorCode ierr;
2831 
2832   PetscFunctionBegin;
2833   *newmat = 0;
2834   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2835   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2836   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2837   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2838   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2839   a       = (Mat_MPIAIJ*)mat->data;
2840 
2841   mat->factortype   = matin->factortype;
2842   mat->assembled    = PETSC_TRUE;
2843   mat->insertmode   = NOT_SET_VALUES;
2844   mat->preallocated = PETSC_TRUE;
2845 
2846   a->size         = oldmat->size;
2847   a->rank         = oldmat->rank;
2848   a->donotstash   = oldmat->donotstash;
2849   a->roworiented  = oldmat->roworiented;
2850   a->rowindices   = 0;
2851   a->rowvalues    = 0;
2852   a->getrowactive = PETSC_FALSE;
2853 
2854   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2855   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2856 
2857   if (oldmat->colmap) {
2858 #if defined(PETSC_USE_CTABLE)
2859     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2860 #else
2861     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2862     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2863     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2864 #endif
2865   } else a->colmap = 0;
2866   if (oldmat->garray) {
2867     PetscInt len;
2868     len  = oldmat->B->cmap->n;
2869     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2870     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2871     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2872   } else a->garray = 0;
2873 
2874   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2875   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2876   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2877   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2878 
2879   if (oldmat->Mvctx_mpi1) {
2880     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2881     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2882   }
2883 
2884   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2885   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2886   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2887   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2888   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2889   *newmat = mat;
2890   PetscFunctionReturn(0);
2891 }
2892 
2893 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2894 {
2895   PetscScalar    *vals,*svals;
2896   MPI_Comm       comm;
2897   PetscErrorCode ierr;
2898   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2899   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2900   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2901   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2902   PetscInt       cend,cstart,n,*rowners;
2903   int            fd;
2904   PetscInt       bs = newMat->rmap->bs;
2905 
2906   PetscFunctionBegin;
2907   /* force binary viewer to load .info file if it has not yet done so */
2908   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2909   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2910   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2911   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2912   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2913   if (!rank) {
2914     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2915     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2916     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2917   }
2918 
2919   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2920   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2921   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2922   if (bs < 0) bs = 1;
2923 
2924   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2925   M    = header[1]; N = header[2];
2926 
2927   /* If global sizes are set, check if they are consistent with that given in the file */
2928   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2929   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2930 
2931   /* determine ownership of all (block) rows */
2932   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2933   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2934   else m = newMat->rmap->n; /* Set by user */
2935 
2936   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2937   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2938 
2939   /* First process needs enough room for process with most rows */
2940   if (!rank) {
2941     mmax = rowners[1];
2942     for (i=2; i<=size; i++) {
2943       mmax = PetscMax(mmax, rowners[i]);
2944     }
2945   } else mmax = -1;             /* unused, but compilers complain */
2946 
2947   rowners[0] = 0;
2948   for (i=2; i<=size; i++) {
2949     rowners[i] += rowners[i-1];
2950   }
2951   rstart = rowners[rank];
2952   rend   = rowners[rank+1];
2953 
2954   /* distribute row lengths to all processors */
2955   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2956   if (!rank) {
2957     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2958     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2959     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2960     for (j=0; j<m; j++) {
2961       procsnz[0] += ourlens[j];
2962     }
2963     for (i=1; i<size; i++) {
2964       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2965       /* calculate the number of nonzeros on each processor */
2966       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2967         procsnz[i] += rowlengths[j];
2968       }
2969       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2970     }
2971     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2972   } else {
2973     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2974   }
2975 
2976   if (!rank) {
2977     /* determine max buffer needed and allocate it */
2978     maxnz = 0;
2979     for (i=0; i<size; i++) {
2980       maxnz = PetscMax(maxnz,procsnz[i]);
2981     }
2982     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2983 
2984     /* read in my part of the matrix column indices  */
2985     nz   = procsnz[0];
2986     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2987     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2988 
2989     /* read in every one elses and ship off */
2990     for (i=1; i<size; i++) {
2991       nz   = procsnz[i];
2992       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2993       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2994     }
2995     ierr = PetscFree(cols);CHKERRQ(ierr);
2996   } else {
2997     /* determine buffer space needed for message */
2998     nz = 0;
2999     for (i=0; i<m; i++) {
3000       nz += ourlens[i];
3001     }
3002     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3003 
3004     /* receive message of column indices*/
3005     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3006   }
3007 
3008   /* determine column ownership if matrix is not square */
3009   if (N != M) {
3010     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3011     else n = newMat->cmap->n;
3012     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3013     cstart = cend - n;
3014   } else {
3015     cstart = rstart;
3016     cend   = rend;
3017     n      = cend - cstart;
3018   }
3019 
3020   /* loop over local rows, determining number of off diagonal entries */
3021   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3022   jj   = 0;
3023   for (i=0; i<m; i++) {
3024     for (j=0; j<ourlens[i]; j++) {
3025       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3026       jj++;
3027     }
3028   }
3029 
3030   for (i=0; i<m; i++) {
3031     ourlens[i] -= offlens[i];
3032   }
3033   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3034 
3035   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3036 
3037   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3038 
3039   for (i=0; i<m; i++) {
3040     ourlens[i] += offlens[i];
3041   }
3042 
3043   if (!rank) {
3044     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3045 
3046     /* read in my part of the matrix numerical values  */
3047     nz   = procsnz[0];
3048     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3049 
3050     /* insert into matrix */
3051     jj      = rstart;
3052     smycols = mycols;
3053     svals   = vals;
3054     for (i=0; i<m; i++) {
3055       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3056       smycols += ourlens[i];
3057       svals   += ourlens[i];
3058       jj++;
3059     }
3060 
3061     /* read in other processors and ship out */
3062     for (i=1; i<size; i++) {
3063       nz   = procsnz[i];
3064       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3065       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3066     }
3067     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3068   } else {
3069     /* receive numeric values */
3070     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3071 
3072     /* receive message of values*/
3073     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3074 
3075     /* insert into matrix */
3076     jj      = rstart;
3077     smycols = mycols;
3078     svals   = vals;
3079     for (i=0; i<m; i++) {
3080       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3081       smycols += ourlens[i];
3082       svals   += ourlens[i];
3083       jj++;
3084     }
3085   }
3086   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3087   ierr = PetscFree(vals);CHKERRQ(ierr);
3088   ierr = PetscFree(mycols);CHKERRQ(ierr);
3089   ierr = PetscFree(rowners);CHKERRQ(ierr);
3090   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3091   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3092   PetscFunctionReturn(0);
3093 }
3094 
3095 /* Not scalable because of ISAllGather() unless getting all columns. */
3096 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3097 {
3098   PetscErrorCode ierr;
3099   IS             iscol_local;
3100   PetscBool      isstride;
3101   PetscMPIInt    lisstride=0,gisstride;
3102 
3103   PetscFunctionBegin;
3104   /* check if we are grabbing all columns*/
3105   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3106 
3107   if (isstride) {
3108     PetscInt  start,len,mstart,mlen;
3109     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3110     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3111     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3112     if (mstart == start && mlen-mstart == len) lisstride = 1;
3113   }
3114 
3115   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3116   if (gisstride) {
3117     PetscInt N;
3118     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3119     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3120     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3121     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3122   } else {
3123     PetscInt cbs;
3124     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3125     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3126     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3127   }
3128 
3129   *isseq = iscol_local;
3130   PetscFunctionReturn(0);
3131 }
3132 
3133 /*
3134  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3135  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3136 
3137  Input Parameters:
3138    mat - matrix
3139    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3140            i.e., mat->rstart <= isrow[i] < mat->rend
3141    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3142            i.e., mat->cstart <= iscol[i] < mat->cend
3143  Output Parameter:
3144    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3145    iscol_o - sequential column index set for retrieving mat->B
3146    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3147  */
3148 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3149 {
3150   PetscErrorCode ierr;
3151   Vec            x,cmap;
3152   const PetscInt *is_idx;
3153   PetscScalar    *xarray,*cmaparray;
3154   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3155   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3156   Mat            B=a->B;
3157   Vec            lvec=a->lvec,lcmap;
3158   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3159   MPI_Comm       comm;
3160   VecScatter     Mvctx=a->Mvctx;
3161 
3162   PetscFunctionBegin;
3163   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3164   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3165 
3166   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3167   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3168   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3169   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3170   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3171 
3172   /* Get start indices */
3173   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3174   isstart -= ncols;
3175   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3176 
3177   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3178   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3179   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3180   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3181   for (i=0; i<ncols; i++) {
3182     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3183     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3184     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3185   }
3186   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3187   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3188   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3189 
3190   /* Get iscol_d */
3191   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3192   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3193   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3194 
3195   /* Get isrow_d */
3196   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3197   rstart = mat->rmap->rstart;
3198   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3199   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3200   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3201   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3202 
3203   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3204   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3205   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3206 
3207   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3208   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3209   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3210 
3211   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3212 
3213   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3214   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3215 
3216   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3217   /* off-process column indices */
3218   count = 0;
3219   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3220   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3221 
3222   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3223   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3224   for (i=0; i<Bn; i++) {
3225     if (PetscRealPart(xarray[i]) > -1.0) {
3226       idx[count]     = i;                   /* local column index in off-diagonal part B */
3227       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3228       count++;
3229     }
3230   }
3231   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3232   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3233 
3234   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3235   /* cannot ensure iscol_o has same blocksize as iscol! */
3236 
3237   ierr = PetscFree(idx);CHKERRQ(ierr);
3238   *garray = cmap1;
3239 
3240   ierr = VecDestroy(&x);CHKERRQ(ierr);
3241   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3242   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3243   PetscFunctionReturn(0);
3244 }
3245 
3246 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3247 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3248 {
3249   PetscErrorCode ierr;
3250   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3251   Mat            M = NULL;
3252   MPI_Comm       comm;
3253   IS             iscol_d,isrow_d,iscol_o;
3254   Mat            Asub = NULL,Bsub = NULL;
3255   PetscInt       n;
3256 
3257   PetscFunctionBegin;
3258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3259 
3260   if (call == MAT_REUSE_MATRIX) {
3261     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3262     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3263     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3264 
3265     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3266     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3267 
3268     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3269     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3270 
3271     /* Update diagonal and off-diagonal portions of submat */
3272     asub = (Mat_MPIAIJ*)(*submat)->data;
3273     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3274     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3275     if (n) {
3276       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3277     }
3278     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3279     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3280 
3281   } else { /* call == MAT_INITIAL_MATRIX) */
3282     const PetscInt *garray;
3283     PetscInt        BsubN;
3284 
3285     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3286     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3287 
3288     /* Create local submatrices Asub and Bsub */
3289     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3290     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3291 
3292     /* Create submatrix M */
3293     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3294 
3295     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3296     asub = (Mat_MPIAIJ*)M->data;
3297 
3298     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3299     n = asub->B->cmap->N;
3300     if (BsubN > n) {
3301       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3302       const PetscInt *idx;
3303       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3304       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3305 
3306       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3307       j = 0;
3308       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3309       for (i=0; i<n; i++) {
3310         if (j >= BsubN) break;
3311         while (subgarray[i] > garray[j]) j++;
3312 
3313         if (subgarray[i] == garray[j]) {
3314           idx_new[i] = idx[j++];
3315         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3316       }
3317       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3318 
3319       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3320       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3321 
3322     } else if (BsubN < n) {
3323       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3324     }
3325 
3326     ierr = PetscFree(garray);CHKERRQ(ierr);
3327     *submat = M;
3328 
3329     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3330     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3331     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3332 
3333     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3334     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3335 
3336     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3337     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3338   }
3339   PetscFunctionReturn(0);
3340 }
3341 
3342 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3343 {
3344   PetscErrorCode ierr;
3345   IS             iscol_local=NULL,isrow_d;
3346   PetscInt       csize;
3347   PetscInt       n,i,j,start,end;
3348   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3349   MPI_Comm       comm;
3350 
3351   PetscFunctionBegin;
3352   /* If isrow has same processor distribution as mat,
3353      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3354   if (call == MAT_REUSE_MATRIX) {
3355     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3356     if (isrow_d) {
3357       sameRowDist  = PETSC_TRUE;
3358       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3359     } else {
3360       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3361       if (iscol_local) {
3362         sameRowDist  = PETSC_TRUE;
3363         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3364       }
3365     }
3366   } else {
3367     /* Check if isrow has same processor distribution as mat */
3368     sameDist[0] = PETSC_FALSE;
3369     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3370     if (!n) {
3371       sameDist[0] = PETSC_TRUE;
3372     } else {
3373       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3374       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3375       if (i >= start && j < end) {
3376         sameDist[0] = PETSC_TRUE;
3377       }
3378     }
3379 
3380     /* Check if iscol has same processor distribution as mat */
3381     sameDist[1] = PETSC_FALSE;
3382     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3383     if (!n) {
3384       sameDist[1] = PETSC_TRUE;
3385     } else {
3386       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3387       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3388       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3389     }
3390 
3391     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3392     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3393     sameRowDist = tsameDist[0];
3394   }
3395 
3396   if (sameRowDist) {
3397     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3398       /* isrow and iscol have same processor distribution as mat */
3399       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3400       PetscFunctionReturn(0);
3401     } else { /* sameRowDist */
3402       /* isrow has same processor distribution as mat */
3403       if (call == MAT_INITIAL_MATRIX) {
3404         PetscBool sorted;
3405         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3406         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3407         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3408         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3409 
3410         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3411         if (sorted) {
3412           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3413           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3414           PetscFunctionReturn(0);
3415         }
3416       } else { /* call == MAT_REUSE_MATRIX */
3417         IS    iscol_sub;
3418         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3419         if (iscol_sub) {
3420           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3421           PetscFunctionReturn(0);
3422         }
3423       }
3424     }
3425   }
3426 
3427   /* General case: iscol -> iscol_local which has global size of iscol */
3428   if (call == MAT_REUSE_MATRIX) {
3429     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3430     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3431   } else {
3432     if (!iscol_local) {
3433       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3434     }
3435   }
3436 
3437   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3438   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3439 
3440   if (call == MAT_INITIAL_MATRIX) {
3441     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3442     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3443   }
3444   PetscFunctionReturn(0);
3445 }
3446 
3447 /*@C
3448      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3449          and "off-diagonal" part of the matrix in CSR format.
3450 
3451    Collective on MPI_Comm
3452 
3453    Input Parameters:
3454 +  comm - MPI communicator
3455 .  A - "diagonal" portion of matrix
3456 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3457 -  garray - global index of B columns
3458 
3459    Output Parameter:
3460 .   mat - the matrix, with input A as its local diagonal matrix
3461    Level: advanced
3462 
3463    Notes:
3464        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3465        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3466 
3467 .seealso: MatCreateMPIAIJWithSplitArrays()
3468 @*/
3469 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3470 {
3471   PetscErrorCode ierr;
3472   Mat_MPIAIJ     *maij;
3473   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3474   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3475   PetscScalar    *oa=b->a;
3476   Mat            Bnew;
3477   PetscInt       m,n,N;
3478 
3479   PetscFunctionBegin;
3480   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3481   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3482   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3483   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3484   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3485   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3486 
3487   /* Get global columns of mat */
3488   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3489 
3490   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3491   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3492   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3493   maij = (Mat_MPIAIJ*)(*mat)->data;
3494 
3495   (*mat)->preallocated = PETSC_TRUE;
3496 
3497   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3498   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3499 
3500   /* Set A as diagonal portion of *mat */
3501   maij->A = A;
3502 
3503   nz = oi[m];
3504   for (i=0; i<nz; i++) {
3505     col   = oj[i];
3506     oj[i] = garray[col];
3507   }
3508 
3509    /* Set Bnew as off-diagonal portion of *mat */
3510   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3511   bnew        = (Mat_SeqAIJ*)Bnew->data;
3512   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3513   maij->B     = Bnew;
3514 
3515   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3516 
3517   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3518   b->free_a       = PETSC_FALSE;
3519   b->free_ij      = PETSC_FALSE;
3520   ierr = MatDestroy(&B);CHKERRQ(ierr);
3521 
3522   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3523   bnew->free_a       = PETSC_TRUE;
3524   bnew->free_ij      = PETSC_TRUE;
3525 
3526   /* condense columns of maij->B */
3527   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3528   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3529   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3530   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3531   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3532   PetscFunctionReturn(0);
3533 }
3534 
3535 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3536 
3537 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3538 {
3539   PetscErrorCode ierr;
3540   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3541   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3542   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3543   Mat            M,Msub,B=a->B;
3544   MatScalar      *aa;
3545   Mat_SeqAIJ     *aij;
3546   PetscInt       *garray = a->garray,*colsub,Ncols;
3547   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3548   IS             iscol_sub,iscmap;
3549   const PetscInt *is_idx,*cmap;
3550   PetscBool      allcolumns=PETSC_FALSE;
3551   MPI_Comm       comm;
3552 
3553   PetscFunctionBegin;
3554   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3555 
3556   if (call == MAT_REUSE_MATRIX) {
3557     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3558     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3559     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3560 
3561     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3562     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3563 
3564     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3565     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3566 
3567     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3568 
3569   } else { /* call == MAT_INITIAL_MATRIX) */
3570     PetscBool flg;
3571 
3572     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3573     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3574 
3575     /* (1) iscol -> nonscalable iscol_local */
3576     /* Check for special case: each processor gets entire matrix columns */
3577     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3578     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3579     if (allcolumns) {
3580       iscol_sub = iscol_local;
3581       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3582       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3583 
3584     } else {
3585       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3586       PetscInt *idx,*cmap1,k;
3587       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3588       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3589       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3590       count = 0;
3591       k     = 0;
3592       for (i=0; i<Ncols; i++) {
3593         j = is_idx[i];
3594         if (j >= cstart && j < cend) {
3595           /* diagonal part of mat */
3596           idx[count]     = j;
3597           cmap1[count++] = i; /* column index in submat */
3598         } else if (Bn) {
3599           /* off-diagonal part of mat */
3600           if (j == garray[k]) {
3601             idx[count]     = j;
3602             cmap1[count++] = i;  /* column index in submat */
3603           } else if (j > garray[k]) {
3604             while (j > garray[k] && k < Bn-1) k++;
3605             if (j == garray[k]) {
3606               idx[count]     = j;
3607               cmap1[count++] = i; /* column index in submat */
3608             }
3609           }
3610         }
3611       }
3612       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3613 
3614       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3615       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3616       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3617 
3618       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3619     }
3620 
3621     /* (3) Create sequential Msub */
3622     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3623   }
3624 
3625   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3626   aij  = (Mat_SeqAIJ*)(Msub)->data;
3627   ii   = aij->i;
3628   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3629 
3630   /*
3631       m - number of local rows
3632       Ncols - number of columns (same on all processors)
3633       rstart - first row in new global matrix generated
3634   */
3635   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3636 
3637   if (call == MAT_INITIAL_MATRIX) {
3638     /* (4) Create parallel newmat */
3639     PetscMPIInt    rank,size;
3640     PetscInt       csize;
3641 
3642     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3643     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3644 
3645     /*
3646         Determine the number of non-zeros in the diagonal and off-diagonal
3647         portions of the matrix in order to do correct preallocation
3648     */
3649 
3650     /* first get start and end of "diagonal" columns */
3651     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3652     if (csize == PETSC_DECIDE) {
3653       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3654       if (mglobal == Ncols) { /* square matrix */
3655         nlocal = m;
3656       } else {
3657         nlocal = Ncols/size + ((Ncols % size) > rank);
3658       }
3659     } else {
3660       nlocal = csize;
3661     }
3662     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3663     rstart = rend - nlocal;
3664     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3665 
3666     /* next, compute all the lengths */
3667     jj    = aij->j;
3668     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3669     olens = dlens + m;
3670     for (i=0; i<m; i++) {
3671       jend = ii[i+1] - ii[i];
3672       olen = 0;
3673       dlen = 0;
3674       for (j=0; j<jend; j++) {
3675         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3676         else dlen++;
3677         jj++;
3678       }
3679       olens[i] = olen;
3680       dlens[i] = dlen;
3681     }
3682 
3683     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3684     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3685 
3686     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3687     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3688     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3689     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3690     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3691     ierr = PetscFree(dlens);CHKERRQ(ierr);
3692 
3693   } else { /* call == MAT_REUSE_MATRIX */
3694     M    = *newmat;
3695     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3696     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3697     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3698     /*
3699          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3700        rather than the slower MatSetValues().
3701     */
3702     M->was_assembled = PETSC_TRUE;
3703     M->assembled     = PETSC_FALSE;
3704   }
3705 
3706   /* (5) Set values of Msub to *newmat */
3707   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3708   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3709 
3710   jj   = aij->j;
3711   aa   = aij->a;
3712   for (i=0; i<m; i++) {
3713     row = rstart + i;
3714     nz  = ii[i+1] - ii[i];
3715     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3716     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3717     jj += nz; aa += nz;
3718   }
3719   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3720 
3721   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3722   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3723 
3724   ierr = PetscFree(colsub);CHKERRQ(ierr);
3725 
3726   /* save Msub, iscol_sub and iscmap used in processor for next request */
3727   if (call ==  MAT_INITIAL_MATRIX) {
3728     *newmat = M;
3729     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3730     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3731 
3732     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3733     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3734 
3735     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3736     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3737 
3738     if (iscol_local) {
3739       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3740       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3741     }
3742   }
3743   PetscFunctionReturn(0);
3744 }
3745 
3746 /*
3747     Not great since it makes two copies of the submatrix, first an SeqAIJ
3748   in local and then by concatenating the local matrices the end result.
3749   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3750 
3751   Note: This requires a sequential iscol with all indices.
3752 */
3753 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3754 {
3755   PetscErrorCode ierr;
3756   PetscMPIInt    rank,size;
3757   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3758   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3759   Mat            M,Mreuse;
3760   MatScalar      *aa,*vwork;
3761   MPI_Comm       comm;
3762   Mat_SeqAIJ     *aij;
3763   PetscBool      colflag,allcolumns=PETSC_FALSE;
3764 
3765   PetscFunctionBegin;
3766   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3767   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3768   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3769 
3770   /* Check for special case: each processor gets entire matrix columns */
3771   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3772   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3773   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3774 
3775   if (call ==  MAT_REUSE_MATRIX) {
3776     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3777     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3778     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3779   } else {
3780     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3781   }
3782 
3783   /*
3784       m - number of local rows
3785       n - number of columns (same on all processors)
3786       rstart - first row in new global matrix generated
3787   */
3788   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3789   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3790   if (call == MAT_INITIAL_MATRIX) {
3791     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3792     ii  = aij->i;
3793     jj  = aij->j;
3794 
3795     /*
3796         Determine the number of non-zeros in the diagonal and off-diagonal
3797         portions of the matrix in order to do correct preallocation
3798     */
3799 
3800     /* first get start and end of "diagonal" columns */
3801     if (csize == PETSC_DECIDE) {
3802       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3803       if (mglobal == n) { /* square matrix */
3804         nlocal = m;
3805       } else {
3806         nlocal = n/size + ((n % size) > rank);
3807       }
3808     } else {
3809       nlocal = csize;
3810     }
3811     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3812     rstart = rend - nlocal;
3813     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3814 
3815     /* next, compute all the lengths */
3816     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3817     olens = dlens + m;
3818     for (i=0; i<m; i++) {
3819       jend = ii[i+1] - ii[i];
3820       olen = 0;
3821       dlen = 0;
3822       for (j=0; j<jend; j++) {
3823         if (*jj < rstart || *jj >= rend) olen++;
3824         else dlen++;
3825         jj++;
3826       }
3827       olens[i] = olen;
3828       dlens[i] = dlen;
3829     }
3830     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3831     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3832     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3833     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3834     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3835     ierr = PetscFree(dlens);CHKERRQ(ierr);
3836   } else {
3837     PetscInt ml,nl;
3838 
3839     M    = *newmat;
3840     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3841     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3842     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3843     /*
3844          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3845        rather than the slower MatSetValues().
3846     */
3847     M->was_assembled = PETSC_TRUE;
3848     M->assembled     = PETSC_FALSE;
3849   }
3850   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3851   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3852   ii   = aij->i;
3853   jj   = aij->j;
3854   aa   = aij->a;
3855   for (i=0; i<m; i++) {
3856     row   = rstart + i;
3857     nz    = ii[i+1] - ii[i];
3858     cwork = jj;     jj += nz;
3859     vwork = aa;     aa += nz;
3860     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3861   }
3862 
3863   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3864   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3865   *newmat = M;
3866 
3867   /* save submatrix used in processor for next request */
3868   if (call ==  MAT_INITIAL_MATRIX) {
3869     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3870     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3871   }
3872   PetscFunctionReturn(0);
3873 }
3874 
3875 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3876 {
3877   PetscInt       m,cstart, cend,j,nnz,i,d;
3878   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3879   const PetscInt *JJ;
3880   PetscScalar    *values;
3881   PetscErrorCode ierr;
3882   PetscBool      nooffprocentries;
3883 
3884   PetscFunctionBegin;
3885   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3886 
3887   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3888   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3889   m      = B->rmap->n;
3890   cstart = B->cmap->rstart;
3891   cend   = B->cmap->rend;
3892   rstart = B->rmap->rstart;
3893 
3894   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3895 
3896 #if defined(PETSC_USE_DEBUG)
3897   for (i=0; i<m; i++) {
3898     nnz = Ii[i+1]- Ii[i];
3899     JJ  = J + Ii[i];
3900     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3901     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3902     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3903   }
3904 #endif
3905 
3906   for (i=0; i<m; i++) {
3907     nnz     = Ii[i+1]- Ii[i];
3908     JJ      = J + Ii[i];
3909     nnz_max = PetscMax(nnz_max,nnz);
3910     d       = 0;
3911     for (j=0; j<nnz; j++) {
3912       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3913     }
3914     d_nnz[i] = d;
3915     o_nnz[i] = nnz - d;
3916   }
3917   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3918   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3919 
3920   if (v) values = (PetscScalar*)v;
3921   else {
3922     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3923   }
3924 
3925   for (i=0; i<m; i++) {
3926     ii   = i + rstart;
3927     nnz  = Ii[i+1]- Ii[i];
3928     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3929   }
3930   nooffprocentries    = B->nooffprocentries;
3931   B->nooffprocentries = PETSC_TRUE;
3932   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3933   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3934   B->nooffprocentries = nooffprocentries;
3935 
3936   if (!v) {
3937     ierr = PetscFree(values);CHKERRQ(ierr);
3938   }
3939   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3940   PetscFunctionReturn(0);
3941 }
3942 
3943 /*@
3944    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3945    (the default parallel PETSc format).
3946 
3947    Collective on MPI_Comm
3948 
3949    Input Parameters:
3950 +  B - the matrix
3951 .  i - the indices into j for the start of each local row (starts with zero)
3952 .  j - the column indices for each local row (starts with zero)
3953 -  v - optional values in the matrix
3954 
3955    Level: developer
3956 
3957    Notes:
3958        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3959      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3960      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3961 
3962        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3963 
3964        The format which is used for the sparse matrix input, is equivalent to a
3965     row-major ordering.. i.e for the following matrix, the input data expected is
3966     as shown
3967 
3968 $        1 0 0
3969 $        2 0 3     P0
3970 $       -------
3971 $        4 5 6     P1
3972 $
3973 $     Process0 [P0]: rows_owned=[0,1]
3974 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3975 $        j =  {0,0,2}  [size = 3]
3976 $        v =  {1,2,3}  [size = 3]
3977 $
3978 $     Process1 [P1]: rows_owned=[2]
3979 $        i =  {0,3}    [size = nrow+1  = 1+1]
3980 $        j =  {0,1,2}  [size = 3]
3981 $        v =  {4,5,6}  [size = 3]
3982 
3983 .keywords: matrix, aij, compressed row, sparse, parallel
3984 
3985 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3986           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3987 @*/
3988 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3989 {
3990   PetscErrorCode ierr;
3991 
3992   PetscFunctionBegin;
3993   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3994   PetscFunctionReturn(0);
3995 }
3996 
3997 /*@C
3998    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3999    (the default parallel PETSc format).  For good matrix assembly performance
4000    the user should preallocate the matrix storage by setting the parameters
4001    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4002    performance can be increased by more than a factor of 50.
4003 
4004    Collective on MPI_Comm
4005 
4006    Input Parameters:
4007 +  B - the matrix
4008 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4009            (same value is used for all local rows)
4010 .  d_nnz - array containing the number of nonzeros in the various rows of the
4011            DIAGONAL portion of the local submatrix (possibly different for each row)
4012            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4013            The size of this array is equal to the number of local rows, i.e 'm'.
4014            For matrices that will be factored, you must leave room for (and set)
4015            the diagonal entry even if it is zero.
4016 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4017            submatrix (same value is used for all local rows).
4018 -  o_nnz - array containing the number of nonzeros in the various rows of the
4019            OFF-DIAGONAL portion of the local submatrix (possibly different for
4020            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4021            structure. The size of this array is equal to the number
4022            of local rows, i.e 'm'.
4023 
4024    If the *_nnz parameter is given then the *_nz parameter is ignored
4025 
4026    The AIJ format (also called the Yale sparse matrix format or
4027    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4028    storage.  The stored row and column indices begin with zero.
4029    See Users-Manual: ch_mat for details.
4030 
4031    The parallel matrix is partitioned such that the first m0 rows belong to
4032    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4033    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4034 
4035    The DIAGONAL portion of the local submatrix of a processor can be defined
4036    as the submatrix which is obtained by extraction the part corresponding to
4037    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4038    first row that belongs to the processor, r2 is the last row belonging to
4039    the this processor, and c1-c2 is range of indices of the local part of a
4040    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4041    common case of a square matrix, the row and column ranges are the same and
4042    the DIAGONAL part is also square. The remaining portion of the local
4043    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4044 
4045    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4046 
4047    You can call MatGetInfo() to get information on how effective the preallocation was;
4048    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4049    You can also run with the option -info and look for messages with the string
4050    malloc in them to see if additional memory allocation was needed.
4051 
4052    Example usage:
4053 
4054    Consider the following 8x8 matrix with 34 non-zero values, that is
4055    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4056    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4057    as follows:
4058 
4059 .vb
4060             1  2  0  |  0  3  0  |  0  4
4061     Proc0   0  5  6  |  7  0  0  |  8  0
4062             9  0 10  | 11  0  0  | 12  0
4063     -------------------------------------
4064            13  0 14  | 15 16 17  |  0  0
4065     Proc1   0 18  0  | 19 20 21  |  0  0
4066             0  0  0  | 22 23  0  | 24  0
4067     -------------------------------------
4068     Proc2  25 26 27  |  0  0 28  | 29  0
4069            30  0  0  | 31 32 33  |  0 34
4070 .ve
4071 
4072    This can be represented as a collection of submatrices as:
4073 
4074 .vb
4075       A B C
4076       D E F
4077       G H I
4078 .ve
4079 
4080    Where the submatrices A,B,C are owned by proc0, D,E,F are
4081    owned by proc1, G,H,I are owned by proc2.
4082 
4083    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4084    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4085    The 'M','N' parameters are 8,8, and have the same values on all procs.
4086 
4087    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4088    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4089    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4090    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4091    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4092    matrix, ans [DF] as another SeqAIJ matrix.
4093 
4094    When d_nz, o_nz parameters are specified, d_nz storage elements are
4095    allocated for every row of the local diagonal submatrix, and o_nz
4096    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4097    One way to choose d_nz and o_nz is to use the max nonzerors per local
4098    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4099    In this case, the values of d_nz,o_nz are:
4100 .vb
4101      proc0 : dnz = 2, o_nz = 2
4102      proc1 : dnz = 3, o_nz = 2
4103      proc2 : dnz = 1, o_nz = 4
4104 .ve
4105    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4106    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4107    for proc3. i.e we are using 12+15+10=37 storage locations to store
4108    34 values.
4109 
4110    When d_nnz, o_nnz parameters are specified, the storage is specified
4111    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4112    In the above case the values for d_nnz,o_nnz are:
4113 .vb
4114      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4115      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4116      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4117 .ve
4118    Here the space allocated is sum of all the above values i.e 34, and
4119    hence pre-allocation is perfect.
4120 
4121    Level: intermediate
4122 
4123 .keywords: matrix, aij, compressed row, sparse, parallel
4124 
4125 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4126           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4127 @*/
4128 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4129 {
4130   PetscErrorCode ierr;
4131 
4132   PetscFunctionBegin;
4133   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4134   PetscValidType(B,1);
4135   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4136   PetscFunctionReturn(0);
4137 }
4138 
4139 /*@
4140      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4141          CSR format the local rows.
4142 
4143    Collective on MPI_Comm
4144 
4145    Input Parameters:
4146 +  comm - MPI communicator
4147 .  m - number of local rows (Cannot be PETSC_DECIDE)
4148 .  n - This value should be the same as the local size used in creating the
4149        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4150        calculated if N is given) For square matrices n is almost always m.
4151 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4152 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4153 .   i - row indices
4154 .   j - column indices
4155 -   a - matrix values
4156 
4157    Output Parameter:
4158 .   mat - the matrix
4159 
4160    Level: intermediate
4161 
4162    Notes:
4163        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4164      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4165      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4166 
4167        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4168 
4169        The format which is used for the sparse matrix input, is equivalent to a
4170     row-major ordering.. i.e for the following matrix, the input data expected is
4171     as shown
4172 
4173 $        1 0 0
4174 $        2 0 3     P0
4175 $       -------
4176 $        4 5 6     P1
4177 $
4178 $     Process0 [P0]: rows_owned=[0,1]
4179 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4180 $        j =  {0,0,2}  [size = 3]
4181 $        v =  {1,2,3}  [size = 3]
4182 $
4183 $     Process1 [P1]: rows_owned=[2]
4184 $        i =  {0,3}    [size = nrow+1  = 1+1]
4185 $        j =  {0,1,2}  [size = 3]
4186 $        v =  {4,5,6}  [size = 3]
4187 
4188 .keywords: matrix, aij, compressed row, sparse, parallel
4189 
4190 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4191           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4192 @*/
4193 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4194 {
4195   PetscErrorCode ierr;
4196 
4197   PetscFunctionBegin;
4198   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4199   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4200   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4201   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4202   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4203   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4204   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4205   PetscFunctionReturn(0);
4206 }
4207 
4208 /*@C
4209    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4210    (the default parallel PETSc format).  For good matrix assembly performance
4211    the user should preallocate the matrix storage by setting the parameters
4212    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4213    performance can be increased by more than a factor of 50.
4214 
4215    Collective on MPI_Comm
4216 
4217    Input Parameters:
4218 +  comm - MPI communicator
4219 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4220            This value should be the same as the local size used in creating the
4221            y vector for the matrix-vector product y = Ax.
4222 .  n - This value should be the same as the local size used in creating the
4223        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4224        calculated if N is given) For square matrices n is almost always m.
4225 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4226 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4227 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4228            (same value is used for all local rows)
4229 .  d_nnz - array containing the number of nonzeros in the various rows of the
4230            DIAGONAL portion of the local submatrix (possibly different for each row)
4231            or NULL, if d_nz is used to specify the nonzero structure.
4232            The size of this array is equal to the number of local rows, i.e 'm'.
4233 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4234            submatrix (same value is used for all local rows).
4235 -  o_nnz - array containing the number of nonzeros in the various rows of the
4236            OFF-DIAGONAL portion of the local submatrix (possibly different for
4237            each row) or NULL, if o_nz is used to specify the nonzero
4238            structure. The size of this array is equal to the number
4239            of local rows, i.e 'm'.
4240 
4241    Output Parameter:
4242 .  A - the matrix
4243 
4244    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4245    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4246    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4247 
4248    Notes:
4249    If the *_nnz parameter is given then the *_nz parameter is ignored
4250 
4251    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4252    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4253    storage requirements for this matrix.
4254 
4255    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4256    processor than it must be used on all processors that share the object for
4257    that argument.
4258 
4259    The user MUST specify either the local or global matrix dimensions
4260    (possibly both).
4261 
4262    The parallel matrix is partitioned across processors such that the
4263    first m0 rows belong to process 0, the next m1 rows belong to
4264    process 1, the next m2 rows belong to process 2 etc.. where
4265    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4266    values corresponding to [m x N] submatrix.
4267 
4268    The columns are logically partitioned with the n0 columns belonging
4269    to 0th partition, the next n1 columns belonging to the next
4270    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4271 
4272    The DIAGONAL portion of the local submatrix on any given processor
4273    is the submatrix corresponding to the rows and columns m,n
4274    corresponding to the given processor. i.e diagonal matrix on
4275    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4276    etc. The remaining portion of the local submatrix [m x (N-n)]
4277    constitute the OFF-DIAGONAL portion. The example below better
4278    illustrates this concept.
4279 
4280    For a square global matrix we define each processor's diagonal portion
4281    to be its local rows and the corresponding columns (a square submatrix);
4282    each processor's off-diagonal portion encompasses the remainder of the
4283    local matrix (a rectangular submatrix).
4284 
4285    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4286 
4287    When calling this routine with a single process communicator, a matrix of
4288    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4289    type of communicator, use the construction mechanism
4290 .vb
4291      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4292 .ve
4293 
4294 $     MatCreate(...,&A);
4295 $     MatSetType(A,MATMPIAIJ);
4296 $     MatSetSizes(A, m,n,M,N);
4297 $     MatMPIAIJSetPreallocation(A,...);
4298 
4299    By default, this format uses inodes (identical nodes) when possible.
4300    We search for consecutive rows with the same nonzero structure, thereby
4301    reusing matrix information to achieve increased efficiency.
4302 
4303    Options Database Keys:
4304 +  -mat_no_inode  - Do not use inodes
4305 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4306 
4307 
4308 
4309    Example usage:
4310 
4311    Consider the following 8x8 matrix with 34 non-zero values, that is
4312    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4313    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4314    as follows
4315 
4316 .vb
4317             1  2  0  |  0  3  0  |  0  4
4318     Proc0   0  5  6  |  7  0  0  |  8  0
4319             9  0 10  | 11  0  0  | 12  0
4320     -------------------------------------
4321            13  0 14  | 15 16 17  |  0  0
4322     Proc1   0 18  0  | 19 20 21  |  0  0
4323             0  0  0  | 22 23  0  | 24  0
4324     -------------------------------------
4325     Proc2  25 26 27  |  0  0 28  | 29  0
4326            30  0  0  | 31 32 33  |  0 34
4327 .ve
4328 
4329    This can be represented as a collection of submatrices as
4330 
4331 .vb
4332       A B C
4333       D E F
4334       G H I
4335 .ve
4336 
4337    Where the submatrices A,B,C are owned by proc0, D,E,F are
4338    owned by proc1, G,H,I are owned by proc2.
4339 
4340    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4341    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4342    The 'M','N' parameters are 8,8, and have the same values on all procs.
4343 
4344    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4345    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4346    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4347    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4348    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4349    matrix, ans [DF] as another SeqAIJ matrix.
4350 
4351    When d_nz, o_nz parameters are specified, d_nz storage elements are
4352    allocated for every row of the local diagonal submatrix, and o_nz
4353    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4354    One way to choose d_nz and o_nz is to use the max nonzerors per local
4355    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4356    In this case, the values of d_nz,o_nz are
4357 .vb
4358      proc0 : dnz = 2, o_nz = 2
4359      proc1 : dnz = 3, o_nz = 2
4360      proc2 : dnz = 1, o_nz = 4
4361 .ve
4362    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4363    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4364    for proc3. i.e we are using 12+15+10=37 storage locations to store
4365    34 values.
4366 
4367    When d_nnz, o_nnz parameters are specified, the storage is specified
4368    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4369    In the above case the values for d_nnz,o_nnz are
4370 .vb
4371      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4372      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4373      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4374 .ve
4375    Here the space allocated is sum of all the above values i.e 34, and
4376    hence pre-allocation is perfect.
4377 
4378    Level: intermediate
4379 
4380 .keywords: matrix, aij, compressed row, sparse, parallel
4381 
4382 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4383           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4384 @*/
4385 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4386 {
4387   PetscErrorCode ierr;
4388   PetscMPIInt    size;
4389 
4390   PetscFunctionBegin;
4391   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4392   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4393   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4394   if (size > 1) {
4395     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4396     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4397   } else {
4398     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4399     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4400   }
4401   PetscFunctionReturn(0);
4402 }
4403 
4404 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4405 {
4406   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4407   PetscBool      flg;
4408   PetscErrorCode ierr;
4409 
4410   PetscFunctionBegin;
4411   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4412   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4413   if (Ad)     *Ad     = a->A;
4414   if (Ao)     *Ao     = a->B;
4415   if (colmap) *colmap = a->garray;
4416   PetscFunctionReturn(0);
4417 }
4418 
4419 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4420 {
4421   PetscErrorCode ierr;
4422   PetscInt       m,N,i,rstart,nnz,Ii;
4423   PetscInt       *indx;
4424   PetscScalar    *values;
4425 
4426   PetscFunctionBegin;
4427   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4428   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4429     PetscInt       *dnz,*onz,sum,bs,cbs;
4430 
4431     if (n == PETSC_DECIDE) {
4432       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4433     }
4434     /* Check sum(n) = N */
4435     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4436     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4437 
4438     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4439     rstart -= m;
4440 
4441     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4442     for (i=0; i<m; i++) {
4443       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4444       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4445       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4446     }
4447 
4448     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4449     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4450     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4451     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4452     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4453     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4454     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4455     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4456   }
4457 
4458   /* numeric phase */
4459   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4460   for (i=0; i<m; i++) {
4461     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4462     Ii   = i + rstart;
4463     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4464     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4465   }
4466   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4467   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4468   PetscFunctionReturn(0);
4469 }
4470 
4471 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4472 {
4473   PetscErrorCode    ierr;
4474   PetscMPIInt       rank;
4475   PetscInt          m,N,i,rstart,nnz;
4476   size_t            len;
4477   const PetscInt    *indx;
4478   PetscViewer       out;
4479   char              *name;
4480   Mat               B;
4481   const PetscScalar *values;
4482 
4483   PetscFunctionBegin;
4484   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4485   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4486   /* Should this be the type of the diagonal block of A? */
4487   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4488   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4489   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4490   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4491   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4492   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4493   for (i=0; i<m; i++) {
4494     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4495     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4496     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4497   }
4498   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4499   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4500 
4501   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4502   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4503   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4504   sprintf(name,"%s.%d",outfile,rank);
4505   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4506   ierr = PetscFree(name);CHKERRQ(ierr);
4507   ierr = MatView(B,out);CHKERRQ(ierr);
4508   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4509   ierr = MatDestroy(&B);CHKERRQ(ierr);
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4514 {
4515   PetscErrorCode      ierr;
4516   Mat_Merge_SeqsToMPI *merge;
4517   PetscContainer      container;
4518 
4519   PetscFunctionBegin;
4520   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4521   if (container) {
4522     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4523     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4524     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4525     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4526     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4527     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4528     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4529     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4530     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4531     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4532     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4533     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4534     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4535     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4536     ierr = PetscFree(merge);CHKERRQ(ierr);
4537     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4538   }
4539   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4540   PetscFunctionReturn(0);
4541 }
4542 
4543 #include <../src/mat/utils/freespace.h>
4544 #include <petscbt.h>
4545 
4546 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4547 {
4548   PetscErrorCode      ierr;
4549   MPI_Comm            comm;
4550   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4551   PetscMPIInt         size,rank,taga,*len_s;
4552   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4553   PetscInt            proc,m;
4554   PetscInt            **buf_ri,**buf_rj;
4555   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4556   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4557   MPI_Request         *s_waits,*r_waits;
4558   MPI_Status          *status;
4559   MatScalar           *aa=a->a;
4560   MatScalar           **abuf_r,*ba_i;
4561   Mat_Merge_SeqsToMPI *merge;
4562   PetscContainer      container;
4563 
4564   PetscFunctionBegin;
4565   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4566   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4567 
4568   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4569   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4570 
4571   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4572   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4573 
4574   bi     = merge->bi;
4575   bj     = merge->bj;
4576   buf_ri = merge->buf_ri;
4577   buf_rj = merge->buf_rj;
4578 
4579   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4580   owners = merge->rowmap->range;
4581   len_s  = merge->len_s;
4582 
4583   /* send and recv matrix values */
4584   /*-----------------------------*/
4585   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4586   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4587 
4588   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4589   for (proc=0,k=0; proc<size; proc++) {
4590     if (!len_s[proc]) continue;
4591     i    = owners[proc];
4592     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4593     k++;
4594   }
4595 
4596   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4597   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4598   ierr = PetscFree(status);CHKERRQ(ierr);
4599 
4600   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4601   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4602 
4603   /* insert mat values of mpimat */
4604   /*----------------------------*/
4605   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4606   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4607 
4608   for (k=0; k<merge->nrecv; k++) {
4609     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4610     nrows       = *(buf_ri_k[k]);
4611     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4612     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4613   }
4614 
4615   /* set values of ba */
4616   m = merge->rowmap->n;
4617   for (i=0; i<m; i++) {
4618     arow = owners[rank] + i;
4619     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4620     bnzi = bi[i+1] - bi[i];
4621     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4622 
4623     /* add local non-zero vals of this proc's seqmat into ba */
4624     anzi   = ai[arow+1] - ai[arow];
4625     aj     = a->j + ai[arow];
4626     aa     = a->a + ai[arow];
4627     nextaj = 0;
4628     for (j=0; nextaj<anzi; j++) {
4629       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4630         ba_i[j] += aa[nextaj++];
4631       }
4632     }
4633 
4634     /* add received vals into ba */
4635     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4636       /* i-th row */
4637       if (i == *nextrow[k]) {
4638         anzi   = *(nextai[k]+1) - *nextai[k];
4639         aj     = buf_rj[k] + *(nextai[k]);
4640         aa     = abuf_r[k] + *(nextai[k]);
4641         nextaj = 0;
4642         for (j=0; nextaj<anzi; j++) {
4643           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4644             ba_i[j] += aa[nextaj++];
4645           }
4646         }
4647         nextrow[k]++; nextai[k]++;
4648       }
4649     }
4650     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4651   }
4652   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4653   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4654 
4655   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4656   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4657   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4658   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4659   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4660   PetscFunctionReturn(0);
4661 }
4662 
4663 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4664 {
4665   PetscErrorCode      ierr;
4666   Mat                 B_mpi;
4667   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4668   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4669   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4670   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4671   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4672   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4673   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4674   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4675   MPI_Status          *status;
4676   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4677   PetscBT             lnkbt;
4678   Mat_Merge_SeqsToMPI *merge;
4679   PetscContainer      container;
4680 
4681   PetscFunctionBegin;
4682   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4683 
4684   /* make sure it is a PETSc comm */
4685   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4686   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4687   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4688 
4689   ierr = PetscNew(&merge);CHKERRQ(ierr);
4690   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4691 
4692   /* determine row ownership */
4693   /*---------------------------------------------------------*/
4694   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4695   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4696   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4697   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4698   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4699   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4700   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4701 
4702   m      = merge->rowmap->n;
4703   owners = merge->rowmap->range;
4704 
4705   /* determine the number of messages to send, their lengths */
4706   /*---------------------------------------------------------*/
4707   len_s = merge->len_s;
4708 
4709   len          = 0; /* length of buf_si[] */
4710   merge->nsend = 0;
4711   for (proc=0; proc<size; proc++) {
4712     len_si[proc] = 0;
4713     if (proc == rank) {
4714       len_s[proc] = 0;
4715     } else {
4716       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4717       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4718     }
4719     if (len_s[proc]) {
4720       merge->nsend++;
4721       nrows = 0;
4722       for (i=owners[proc]; i<owners[proc+1]; i++) {
4723         if (ai[i+1] > ai[i]) nrows++;
4724       }
4725       len_si[proc] = 2*(nrows+1);
4726       len         += len_si[proc];
4727     }
4728   }
4729 
4730   /* determine the number and length of messages to receive for ij-structure */
4731   /*-------------------------------------------------------------------------*/
4732   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4733   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4734 
4735   /* post the Irecv of j-structure */
4736   /*-------------------------------*/
4737   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4738   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4739 
4740   /* post the Isend of j-structure */
4741   /*--------------------------------*/
4742   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4743 
4744   for (proc=0, k=0; proc<size; proc++) {
4745     if (!len_s[proc]) continue;
4746     i    = owners[proc];
4747     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4748     k++;
4749   }
4750 
4751   /* receives and sends of j-structure are complete */
4752   /*------------------------------------------------*/
4753   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4754   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4755 
4756   /* send and recv i-structure */
4757   /*---------------------------*/
4758   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4759   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4760 
4761   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4762   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4763   for (proc=0,k=0; proc<size; proc++) {
4764     if (!len_s[proc]) continue;
4765     /* form outgoing message for i-structure:
4766          buf_si[0]:                 nrows to be sent
4767                [1:nrows]:           row index (global)
4768                [nrows+1:2*nrows+1]: i-structure index
4769     */
4770     /*-------------------------------------------*/
4771     nrows       = len_si[proc]/2 - 1;
4772     buf_si_i    = buf_si + nrows+1;
4773     buf_si[0]   = nrows;
4774     buf_si_i[0] = 0;
4775     nrows       = 0;
4776     for (i=owners[proc]; i<owners[proc+1]; i++) {
4777       anzi = ai[i+1] - ai[i];
4778       if (anzi) {
4779         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4780         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4781         nrows++;
4782       }
4783     }
4784     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4785     k++;
4786     buf_si += len_si[proc];
4787   }
4788 
4789   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4790   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4791 
4792   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4793   for (i=0; i<merge->nrecv; i++) {
4794     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4795   }
4796 
4797   ierr = PetscFree(len_si);CHKERRQ(ierr);
4798   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4799   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4800   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4801   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4802   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4803   ierr = PetscFree(status);CHKERRQ(ierr);
4804 
4805   /* compute a local seq matrix in each processor */
4806   /*----------------------------------------------*/
4807   /* allocate bi array and free space for accumulating nonzero column info */
4808   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4809   bi[0] = 0;
4810 
4811   /* create and initialize a linked list */
4812   nlnk = N+1;
4813   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4814 
4815   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4816   len  = ai[owners[rank+1]] - ai[owners[rank]];
4817   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4818 
4819   current_space = free_space;
4820 
4821   /* determine symbolic info for each local row */
4822   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4823 
4824   for (k=0; k<merge->nrecv; k++) {
4825     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4826     nrows       = *buf_ri_k[k];
4827     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4828     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4829   }
4830 
4831   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4832   len  = 0;
4833   for (i=0; i<m; i++) {
4834     bnzi = 0;
4835     /* add local non-zero cols of this proc's seqmat into lnk */
4836     arow  = owners[rank] + i;
4837     anzi  = ai[arow+1] - ai[arow];
4838     aj    = a->j + ai[arow];
4839     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4840     bnzi += nlnk;
4841     /* add received col data into lnk */
4842     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4843       if (i == *nextrow[k]) { /* i-th row */
4844         anzi  = *(nextai[k]+1) - *nextai[k];
4845         aj    = buf_rj[k] + *nextai[k];
4846         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4847         bnzi += nlnk;
4848         nextrow[k]++; nextai[k]++;
4849       }
4850     }
4851     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4852 
4853     /* if free space is not available, make more free space */
4854     if (current_space->local_remaining<bnzi) {
4855       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4856       nspacedouble++;
4857     }
4858     /* copy data into free space, then initialize lnk */
4859     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4860     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4861 
4862     current_space->array           += bnzi;
4863     current_space->local_used      += bnzi;
4864     current_space->local_remaining -= bnzi;
4865 
4866     bi[i+1] = bi[i] + bnzi;
4867   }
4868 
4869   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4870 
4871   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4872   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4873   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4874 
4875   /* create symbolic parallel matrix B_mpi */
4876   /*---------------------------------------*/
4877   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4878   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4879   if (n==PETSC_DECIDE) {
4880     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4881   } else {
4882     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4883   }
4884   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4885   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4886   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4887   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4888   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4889 
4890   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4891   B_mpi->assembled    = PETSC_FALSE;
4892   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4893   merge->bi           = bi;
4894   merge->bj           = bj;
4895   merge->buf_ri       = buf_ri;
4896   merge->buf_rj       = buf_rj;
4897   merge->coi          = NULL;
4898   merge->coj          = NULL;
4899   merge->owners_co    = NULL;
4900 
4901   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4902 
4903   /* attach the supporting struct to B_mpi for reuse */
4904   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4905   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4906   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4907   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4908   *mpimat = B_mpi;
4909 
4910   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4911   PetscFunctionReturn(0);
4912 }
4913 
4914 /*@C
4915       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4916                  matrices from each processor
4917 
4918     Collective on MPI_Comm
4919 
4920    Input Parameters:
4921 +    comm - the communicators the parallel matrix will live on
4922 .    seqmat - the input sequential matrices
4923 .    m - number of local rows (or PETSC_DECIDE)
4924 .    n - number of local columns (or PETSC_DECIDE)
4925 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4926 
4927    Output Parameter:
4928 .    mpimat - the parallel matrix generated
4929 
4930     Level: advanced
4931 
4932    Notes:
4933      The dimensions of the sequential matrix in each processor MUST be the same.
4934      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4935      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4936 @*/
4937 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4938 {
4939   PetscErrorCode ierr;
4940   PetscMPIInt    size;
4941 
4942   PetscFunctionBegin;
4943   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4944   if (size == 1) {
4945     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4946     if (scall == MAT_INITIAL_MATRIX) {
4947       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4948     } else {
4949       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4950     }
4951     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4952     PetscFunctionReturn(0);
4953   }
4954   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4955   if (scall == MAT_INITIAL_MATRIX) {
4956     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4957   }
4958   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4959   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4960   PetscFunctionReturn(0);
4961 }
4962 
4963 /*@
4964      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4965           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4966           with MatGetSize()
4967 
4968     Not Collective
4969 
4970    Input Parameters:
4971 +    A - the matrix
4972 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4973 
4974    Output Parameter:
4975 .    A_loc - the local sequential matrix generated
4976 
4977     Level: developer
4978 
4979 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4980 
4981 @*/
4982 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4983 {
4984   PetscErrorCode ierr;
4985   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4986   Mat_SeqAIJ     *mat,*a,*b;
4987   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4988   MatScalar      *aa,*ba,*cam;
4989   PetscScalar    *ca;
4990   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4991   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4992   PetscBool      match;
4993   MPI_Comm       comm;
4994   PetscMPIInt    size;
4995 
4996   PetscFunctionBegin;
4997   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4998   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4999   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5000   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5001   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5002 
5003   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5004   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5005   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5006   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5007   aa = a->a; ba = b->a;
5008   if (scall == MAT_INITIAL_MATRIX) {
5009     if (size == 1) {
5010       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5011       PetscFunctionReturn(0);
5012     }
5013 
5014     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5015     ci[0] = 0;
5016     for (i=0; i<am; i++) {
5017       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5018     }
5019     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5020     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5021     k    = 0;
5022     for (i=0; i<am; i++) {
5023       ncols_o = bi[i+1] - bi[i];
5024       ncols_d = ai[i+1] - ai[i];
5025       /* off-diagonal portion of A */
5026       for (jo=0; jo<ncols_o; jo++) {
5027         col = cmap[*bj];
5028         if (col >= cstart) break;
5029         cj[k]   = col; bj++;
5030         ca[k++] = *ba++;
5031       }
5032       /* diagonal portion of A */
5033       for (j=0; j<ncols_d; j++) {
5034         cj[k]   = cstart + *aj++;
5035         ca[k++] = *aa++;
5036       }
5037       /* off-diagonal portion of A */
5038       for (j=jo; j<ncols_o; j++) {
5039         cj[k]   = cmap[*bj++];
5040         ca[k++] = *ba++;
5041       }
5042     }
5043     /* put together the new matrix */
5044     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5045     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5046     /* Since these are PETSc arrays, change flags to free them as necessary. */
5047     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5048     mat->free_a  = PETSC_TRUE;
5049     mat->free_ij = PETSC_TRUE;
5050     mat->nonew   = 0;
5051   } else if (scall == MAT_REUSE_MATRIX) {
5052     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5053     ci = mat->i; cj = mat->j; cam = mat->a;
5054     for (i=0; i<am; i++) {
5055       /* off-diagonal portion of A */
5056       ncols_o = bi[i+1] - bi[i];
5057       for (jo=0; jo<ncols_o; jo++) {
5058         col = cmap[*bj];
5059         if (col >= cstart) break;
5060         *cam++ = *ba++; bj++;
5061       }
5062       /* diagonal portion of A */
5063       ncols_d = ai[i+1] - ai[i];
5064       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5065       /* off-diagonal portion of A */
5066       for (j=jo; j<ncols_o; j++) {
5067         *cam++ = *ba++; bj++;
5068       }
5069     }
5070   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5071   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5072   PetscFunctionReturn(0);
5073 }
5074 
5075 /*@C
5076      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5077 
5078     Not Collective
5079 
5080    Input Parameters:
5081 +    A - the matrix
5082 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5083 -    row, col - index sets of rows and columns to extract (or NULL)
5084 
5085    Output Parameter:
5086 .    A_loc - the local sequential matrix generated
5087 
5088     Level: developer
5089 
5090 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5091 
5092 @*/
5093 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5094 {
5095   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5096   PetscErrorCode ierr;
5097   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5098   IS             isrowa,iscola;
5099   Mat            *aloc;
5100   PetscBool      match;
5101 
5102   PetscFunctionBegin;
5103   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5104   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5105   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5106   if (!row) {
5107     start = A->rmap->rstart; end = A->rmap->rend;
5108     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5109   } else {
5110     isrowa = *row;
5111   }
5112   if (!col) {
5113     start = A->cmap->rstart;
5114     cmap  = a->garray;
5115     nzA   = a->A->cmap->n;
5116     nzB   = a->B->cmap->n;
5117     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5118     ncols = 0;
5119     for (i=0; i<nzB; i++) {
5120       if (cmap[i] < start) idx[ncols++] = cmap[i];
5121       else break;
5122     }
5123     imark = i;
5124     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5125     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5126     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5127   } else {
5128     iscola = *col;
5129   }
5130   if (scall != MAT_INITIAL_MATRIX) {
5131     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5132     aloc[0] = *A_loc;
5133   }
5134   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5135   if (!col) { /* attach global id of condensed columns */
5136     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5137   }
5138   *A_loc = aloc[0];
5139   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5140   if (!row) {
5141     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5142   }
5143   if (!col) {
5144     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5145   }
5146   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5147   PetscFunctionReturn(0);
5148 }
5149 
5150 /*@C
5151     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5152 
5153     Collective on Mat
5154 
5155    Input Parameters:
5156 +    A,B - the matrices in mpiaij format
5157 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5158 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5159 
5160    Output Parameter:
5161 +    rowb, colb - index sets of rows and columns of B to extract
5162 -    B_seq - the sequential matrix generated
5163 
5164     Level: developer
5165 
5166 @*/
5167 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5168 {
5169   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5170   PetscErrorCode ierr;
5171   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5172   IS             isrowb,iscolb;
5173   Mat            *bseq=NULL;
5174 
5175   PetscFunctionBegin;
5176   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5177     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5178   }
5179   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5180 
5181   if (scall == MAT_INITIAL_MATRIX) {
5182     start = A->cmap->rstart;
5183     cmap  = a->garray;
5184     nzA   = a->A->cmap->n;
5185     nzB   = a->B->cmap->n;
5186     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5187     ncols = 0;
5188     for (i=0; i<nzB; i++) {  /* row < local row index */
5189       if (cmap[i] < start) idx[ncols++] = cmap[i];
5190       else break;
5191     }
5192     imark = i;
5193     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5194     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5195     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5196     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5197   } else {
5198     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5199     isrowb  = *rowb; iscolb = *colb;
5200     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5201     bseq[0] = *B_seq;
5202   }
5203   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5204   *B_seq = bseq[0];
5205   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5206   if (!rowb) {
5207     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5208   } else {
5209     *rowb = isrowb;
5210   }
5211   if (!colb) {
5212     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5213   } else {
5214     *colb = iscolb;
5215   }
5216   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5217   PetscFunctionReturn(0);
5218 }
5219 
5220 /*
5221     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5222     of the OFF-DIAGONAL portion of local A
5223 
5224     Collective on Mat
5225 
5226    Input Parameters:
5227 +    A,B - the matrices in mpiaij format
5228 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5229 
5230    Output Parameter:
5231 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5232 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5233 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5234 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5235 
5236     Level: developer
5237 
5238 */
5239 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5240 {
5241   VecScatter_MPI_General *gen_to,*gen_from;
5242   PetscErrorCode         ierr;
5243   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5244   Mat_SeqAIJ             *b_oth;
5245   VecScatter             ctx;
5246   MPI_Comm               comm;
5247   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5248   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5249   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5250   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5251   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5252   MPI_Request            *rwaits = NULL,*swaits = NULL;
5253   MPI_Status             *sstatus,rstatus;
5254   PetscMPIInt            jj,size;
5255   VecScatterType         type;
5256   PetscBool              mpi1;
5257 
5258   PetscFunctionBegin;
5259   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5260   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5261 
5262   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5263     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5264   }
5265   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5266   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5267 
5268   if (size == 1) {
5269     startsj_s = NULL;
5270     bufa_ptr  = NULL;
5271     *B_oth    = NULL;
5272     PetscFunctionReturn(0);
5273   }
5274 
5275   ctx = a->Mvctx;
5276   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5277   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5278   if (!mpi1) {
5279     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5280      thus create a->Mvctx_mpi1 */
5281     if (!a->Mvctx_mpi1) {
5282       a->Mvctx_mpi1_flg = PETSC_TRUE;
5283       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5284     }
5285     ctx = a->Mvctx_mpi1;
5286   }
5287   tag = ((PetscObject)ctx)->tag;
5288 
5289   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5290   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5291   nrecvs   = gen_from->n;
5292   nsends   = gen_to->n;
5293 
5294   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5295   srow    = gen_to->indices;    /* local row index to be sent */
5296   sstarts = gen_to->starts;
5297   sprocs  = gen_to->procs;
5298   sstatus = gen_to->sstatus;
5299   sbs     = gen_to->bs;
5300   rstarts = gen_from->starts;
5301   rprocs  = gen_from->procs;
5302   rbs     = gen_from->bs;
5303 
5304   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5305   if (scall == MAT_INITIAL_MATRIX) {
5306     /* i-array */
5307     /*---------*/
5308     /*  post receives */
5309     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5310     for (i=0; i<nrecvs; i++) {
5311       rowlen = rvalues + rstarts[i]*rbs;
5312       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5313       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5314     }
5315 
5316     /* pack the outgoing message */
5317     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5318 
5319     sstartsj[0] = 0;
5320     rstartsj[0] = 0;
5321     len         = 0; /* total length of j or a array to be sent */
5322     k           = 0;
5323     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5324     for (i=0; i<nsends; i++) {
5325       rowlen = svalues + sstarts[i]*sbs;
5326       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5327       for (j=0; j<nrows; j++) {
5328         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5329         for (l=0; l<sbs; l++) {
5330           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5331 
5332           rowlen[j*sbs+l] = ncols;
5333 
5334           len += ncols;
5335           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5336         }
5337         k++;
5338       }
5339       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5340 
5341       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5342     }
5343     /* recvs and sends of i-array are completed */
5344     i = nrecvs;
5345     while (i--) {
5346       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5347     }
5348     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5349     ierr = PetscFree(svalues);CHKERRQ(ierr);
5350 
5351     /* allocate buffers for sending j and a arrays */
5352     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5353     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5354 
5355     /* create i-array of B_oth */
5356     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5357 
5358     b_othi[0] = 0;
5359     len       = 0; /* total length of j or a array to be received */
5360     k         = 0;
5361     for (i=0; i<nrecvs; i++) {
5362       rowlen = rvalues + rstarts[i]*rbs;
5363       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5364       for (j=0; j<nrows; j++) {
5365         b_othi[k+1] = b_othi[k] + rowlen[j];
5366         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5367         k++;
5368       }
5369       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5370     }
5371     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5372 
5373     /* allocate space for j and a arrrays of B_oth */
5374     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5375     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5376 
5377     /* j-array */
5378     /*---------*/
5379     /*  post receives of j-array */
5380     for (i=0; i<nrecvs; i++) {
5381       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5382       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5383     }
5384 
5385     /* pack the outgoing message j-array */
5386     k = 0;
5387     for (i=0; i<nsends; i++) {
5388       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5389       bufJ  = bufj+sstartsj[i];
5390       for (j=0; j<nrows; j++) {
5391         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5392         for (ll=0; ll<sbs; ll++) {
5393           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5394           for (l=0; l<ncols; l++) {
5395             *bufJ++ = cols[l];
5396           }
5397           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5398         }
5399       }
5400       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5401     }
5402 
5403     /* recvs and sends of j-array are completed */
5404     i = nrecvs;
5405     while (i--) {
5406       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5407     }
5408     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5409   } else if (scall == MAT_REUSE_MATRIX) {
5410     sstartsj = *startsj_s;
5411     rstartsj = *startsj_r;
5412     bufa     = *bufa_ptr;
5413     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5414     b_otha   = b_oth->a;
5415   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5416 
5417   /* a-array */
5418   /*---------*/
5419   /*  post receives of a-array */
5420   for (i=0; i<nrecvs; i++) {
5421     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5422     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5423   }
5424 
5425   /* pack the outgoing message a-array */
5426   k = 0;
5427   for (i=0; i<nsends; i++) {
5428     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5429     bufA  = bufa+sstartsj[i];
5430     for (j=0; j<nrows; j++) {
5431       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5432       for (ll=0; ll<sbs; ll++) {
5433         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5434         for (l=0; l<ncols; l++) {
5435           *bufA++ = vals[l];
5436         }
5437         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5438       }
5439     }
5440     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5441   }
5442   /* recvs and sends of a-array are completed */
5443   i = nrecvs;
5444   while (i--) {
5445     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5446   }
5447   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5448   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5449 
5450   if (scall == MAT_INITIAL_MATRIX) {
5451     /* put together the new matrix */
5452     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5453 
5454     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5455     /* Since these are PETSc arrays, change flags to free them as necessary. */
5456     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5457     b_oth->free_a  = PETSC_TRUE;
5458     b_oth->free_ij = PETSC_TRUE;
5459     b_oth->nonew   = 0;
5460 
5461     ierr = PetscFree(bufj);CHKERRQ(ierr);
5462     if (!startsj_s || !bufa_ptr) {
5463       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5464       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5465     } else {
5466       *startsj_s = sstartsj;
5467       *startsj_r = rstartsj;
5468       *bufa_ptr  = bufa;
5469     }
5470   }
5471   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5472   PetscFunctionReturn(0);
5473 }
5474 
5475 /*@C
5476   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5477 
5478   Not Collective
5479 
5480   Input Parameters:
5481 . A - The matrix in mpiaij format
5482 
5483   Output Parameter:
5484 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5485 . colmap - A map from global column index to local index into lvec
5486 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5487 
5488   Level: developer
5489 
5490 @*/
5491 #if defined(PETSC_USE_CTABLE)
5492 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5493 #else
5494 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5495 #endif
5496 {
5497   Mat_MPIAIJ *a;
5498 
5499   PetscFunctionBegin;
5500   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5501   PetscValidPointer(lvec, 2);
5502   PetscValidPointer(colmap, 3);
5503   PetscValidPointer(multScatter, 4);
5504   a = (Mat_MPIAIJ*) A->data;
5505   if (lvec) *lvec = a->lvec;
5506   if (colmap) *colmap = a->colmap;
5507   if (multScatter) *multScatter = a->Mvctx;
5508   PetscFunctionReturn(0);
5509 }
5510 
5511 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5512 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5513 #if defined(PETSC_HAVE_MKL_SPARSE)
5514 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5515 #endif
5516 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5517 #if defined(PETSC_HAVE_ELEMENTAL)
5518 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5519 #endif
5520 #if defined(PETSC_HAVE_HYPRE)
5521 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5522 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5523 #endif
5524 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5526 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5527 
5528 /*
5529     Computes (B'*A')' since computing B*A directly is untenable
5530 
5531                n                       p                          p
5532         (              )       (              )         (                  )
5533       m (      A       )  *  n (       B      )   =   m (         C        )
5534         (              )       (              )         (                  )
5535 
5536 */
5537 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5538 {
5539   PetscErrorCode ierr;
5540   Mat            At,Bt,Ct;
5541 
5542   PetscFunctionBegin;
5543   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5544   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5545   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5546   ierr = MatDestroy(&At);CHKERRQ(ierr);
5547   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5548   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5549   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5550   PetscFunctionReturn(0);
5551 }
5552 
5553 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5554 {
5555   PetscErrorCode ierr;
5556   PetscInt       m=A->rmap->n,n=B->cmap->n;
5557   Mat            Cmat;
5558 
5559   PetscFunctionBegin;
5560   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5561   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5562   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5563   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5564   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5565   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5566   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5567   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5568 
5569   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5570 
5571   *C = Cmat;
5572   PetscFunctionReturn(0);
5573 }
5574 
5575 /* ----------------------------------------------------------------*/
5576 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5577 {
5578   PetscErrorCode ierr;
5579 
5580   PetscFunctionBegin;
5581   if (scall == MAT_INITIAL_MATRIX) {
5582     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5583     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5584     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5585   }
5586   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5587   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5588   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5589   PetscFunctionReturn(0);
5590 }
5591 
5592 /*MC
5593    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5594 
5595    Options Database Keys:
5596 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5597 
5598   Level: beginner
5599 
5600 .seealso: MatCreateAIJ()
5601 M*/
5602 
5603 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5604 {
5605   Mat_MPIAIJ     *b;
5606   PetscErrorCode ierr;
5607   PetscMPIInt    size;
5608 
5609   PetscFunctionBegin;
5610   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5611 
5612   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5613   B->data       = (void*)b;
5614   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5615   B->assembled  = PETSC_FALSE;
5616   B->insertmode = NOT_SET_VALUES;
5617   b->size       = size;
5618 
5619   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5620 
5621   /* build cache for off array entries formed */
5622   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5623 
5624   b->donotstash  = PETSC_FALSE;
5625   b->colmap      = 0;
5626   b->garray      = 0;
5627   b->roworiented = PETSC_TRUE;
5628 
5629   /* stuff used for matrix vector multiply */
5630   b->lvec  = NULL;
5631   b->Mvctx = NULL;
5632 
5633   /* stuff for MatGetRow() */
5634   b->rowindices   = 0;
5635   b->rowvalues    = 0;
5636   b->getrowactive = PETSC_FALSE;
5637 
5638   /* flexible pointer used in CUSP/CUSPARSE classes */
5639   b->spptr = NULL;
5640 
5641   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5643   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5644   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5646   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5649   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5650 #if defined(PETSC_HAVE_MKL_SPARSE)
5651   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5652 #endif
5653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5654   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5655 #if defined(PETSC_HAVE_ELEMENTAL)
5656   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5657 #endif
5658 #if defined(PETSC_HAVE_HYPRE)
5659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5660 #endif
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5666 #if defined(PETSC_HAVE_HYPRE)
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5668 #endif
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5670   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5671   PetscFunctionReturn(0);
5672 }
5673 
5674 /*@C
5675      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5676          and "off-diagonal" part of the matrix in CSR format.
5677 
5678    Collective on MPI_Comm
5679 
5680    Input Parameters:
5681 +  comm - MPI communicator
5682 .  m - number of local rows (Cannot be PETSC_DECIDE)
5683 .  n - This value should be the same as the local size used in creating the
5684        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5685        calculated if N is given) For square matrices n is almost always m.
5686 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5687 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5688 .   i - row indices for "diagonal" portion of matrix
5689 .   j - column indices
5690 .   a - matrix values
5691 .   oi - row indices for "off-diagonal" portion of matrix
5692 .   oj - column indices
5693 -   oa - matrix values
5694 
5695    Output Parameter:
5696 .   mat - the matrix
5697 
5698    Level: advanced
5699 
5700    Notes:
5701        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5702        must free the arrays once the matrix has been destroyed and not before.
5703 
5704        The i and j indices are 0 based
5705 
5706        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5707 
5708        This sets local rows and cannot be used to set off-processor values.
5709 
5710        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5711        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5712        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5713        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5714        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5715        communication if it is known that only local entries will be set.
5716 
5717 .keywords: matrix, aij, compressed row, sparse, parallel
5718 
5719 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5720           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5721 @*/
5722 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5723 {
5724   PetscErrorCode ierr;
5725   Mat_MPIAIJ     *maij;
5726 
5727   PetscFunctionBegin;
5728   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5729   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5730   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5731   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5732   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5733   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5734   maij = (Mat_MPIAIJ*) (*mat)->data;
5735 
5736   (*mat)->preallocated = PETSC_TRUE;
5737 
5738   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5739   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5740 
5741   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5742   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5743 
5744   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5745   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5746   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5747   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5748 
5749   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5750   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5751   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5752   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5753   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5754   PetscFunctionReturn(0);
5755 }
5756 
5757 /*
5758     Special version for direct calls from Fortran
5759 */
5760 #include <petsc/private/fortranimpl.h>
5761 
5762 /* Change these macros so can be used in void function */
5763 #undef CHKERRQ
5764 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5765 #undef SETERRQ2
5766 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5767 #undef SETERRQ3
5768 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5769 #undef SETERRQ
5770 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5771 
5772 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5773 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5774 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5775 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5776 #else
5777 #endif
5778 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5779 {
5780   Mat            mat  = *mmat;
5781   PetscInt       m    = *mm, n = *mn;
5782   InsertMode     addv = *maddv;
5783   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5784   PetscScalar    value;
5785   PetscErrorCode ierr;
5786 
5787   MatCheckPreallocated(mat,1);
5788   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5789 
5790 #if defined(PETSC_USE_DEBUG)
5791   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5792 #endif
5793   {
5794     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5795     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5796     PetscBool roworiented = aij->roworiented;
5797 
5798     /* Some Variables required in the macro */
5799     Mat        A                 = aij->A;
5800     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5801     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5802     MatScalar  *aa               = a->a;
5803     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5804     Mat        B                 = aij->B;
5805     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5806     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5807     MatScalar  *ba               = b->a;
5808 
5809     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5810     PetscInt  nonew = a->nonew;
5811     MatScalar *ap1,*ap2;
5812 
5813     PetscFunctionBegin;
5814     for (i=0; i<m; i++) {
5815       if (im[i] < 0) continue;
5816 #if defined(PETSC_USE_DEBUG)
5817       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5818 #endif
5819       if (im[i] >= rstart && im[i] < rend) {
5820         row      = im[i] - rstart;
5821         lastcol1 = -1;
5822         rp1      = aj + ai[row];
5823         ap1      = aa + ai[row];
5824         rmax1    = aimax[row];
5825         nrow1    = ailen[row];
5826         low1     = 0;
5827         high1    = nrow1;
5828         lastcol2 = -1;
5829         rp2      = bj + bi[row];
5830         ap2      = ba + bi[row];
5831         rmax2    = bimax[row];
5832         nrow2    = bilen[row];
5833         low2     = 0;
5834         high2    = nrow2;
5835 
5836         for (j=0; j<n; j++) {
5837           if (roworiented) value = v[i*n+j];
5838           else value = v[i+j*m];
5839           if (in[j] >= cstart && in[j] < cend) {
5840             col = in[j] - cstart;
5841             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5842             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5843           } else if (in[j] < 0) continue;
5844 #if defined(PETSC_USE_DEBUG)
5845           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5846           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5847 #endif
5848           else {
5849             if (mat->was_assembled) {
5850               if (!aij->colmap) {
5851                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5852               }
5853 #if defined(PETSC_USE_CTABLE)
5854               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5855               col--;
5856 #else
5857               col = aij->colmap[in[j]] - 1;
5858 #endif
5859               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5860               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5861                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5862                 col  =  in[j];
5863                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5864                 B     = aij->B;
5865                 b     = (Mat_SeqAIJ*)B->data;
5866                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5867                 rp2   = bj + bi[row];
5868                 ap2   = ba + bi[row];
5869                 rmax2 = bimax[row];
5870                 nrow2 = bilen[row];
5871                 low2  = 0;
5872                 high2 = nrow2;
5873                 bm    = aij->B->rmap->n;
5874                 ba    = b->a;
5875               }
5876             } else col = in[j];
5877             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5878           }
5879         }
5880       } else if (!aij->donotstash) {
5881         if (roworiented) {
5882           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5883         } else {
5884           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5885         }
5886       }
5887     }
5888   }
5889   PetscFunctionReturnVoid();
5890 }
5891