xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e0fe450639229424d8a6abf0a9561cf44a362601)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
639 */
640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
641 {
642   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
643   Mat            A           = aij->A; /* diagonal part of the matrix */
644   Mat            B           = aij->B; /* offdiagonal part of the matrix */
645   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
646   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
647   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
648   PetscInt       *ailen      = a->ilen,*aj = a->j;
649   PetscInt       *bilen      = b->ilen,*bj = b->j;
650   PetscInt       am          = aij->A->rmap->n,j;
651   PetscInt       diag_so_far = 0,dnz;
652   PetscInt       offd_so_far = 0,onz;
653 
654   PetscFunctionBegin;
655   /* Iterate over all rows of the matrix */
656   for (j=0; j<am; j++) {
657     dnz = onz = 0;
658     /*  Iterate over all non-zero columns of the current row */
659     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
660       /* If column is in the diagonal */
661       if (mat_j[col] >= cstart && mat_j[col] < cend) {
662         aj[diag_so_far++] = mat_j[col] - cstart;
663         dnz++;
664       } else { /* off-diagonal entries */
665         bj[offd_so_far++] = mat_j[col];
666         onz++;
667       }
668     }
669     ailen[j] = dnz;
670     bilen[j] = onz;
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
677     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
678     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
679     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
680     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
681 */
682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
683 {
684   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
685   Mat            A      = aij->A; /* diagonal part of the matrix */
686   Mat            B      = aij->B; /* offdiagonal part of the matrix */
687   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
688   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
689   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
690   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
691   PetscInt       *ailen = a->ilen,*aj = a->j;
692   PetscInt       *bilen = b->ilen,*bj = b->j;
693   PetscInt       am     = aij->A->rmap->n,j;
694   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
695   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
696   PetscScalar    *aa = a->a,*ba = b->a;
697 
698   PetscFunctionBegin;
699   /* Iterate over all rows of the matrix */
700   for (j=0; j<am; j++) {
701     dnz_row = onz_row = 0;
702     rowstart_offd = full_offd_i[j];
703     rowstart_diag = full_diag_i[j];
704     /*  Iterate over all non-zero columns of the current row */
705     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
706       /* If column is in the diagonal */
707       if (mat_j[col] >= cstart && mat_j[col] < cend) {
708         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
709         aa[rowstart_diag+dnz_row] = mat_a[col];
710         dnz_row++;
711       } else { /* off-diagonal entries */
712         bj[rowstart_offd+onz_row] = mat_j[col];
713         ba[rowstart_offd+onz_row] = mat_a[col];
714         onz_row++;
715       }
716     }
717     ailen[j] = dnz_row;
718     bilen[j] = onz_row;
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
724 {
725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
726   PetscErrorCode ierr;
727   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
728   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
729 
730   PetscFunctionBegin;
731   for (i=0; i<m; i++) {
732     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
733     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
734     if (idxm[i] >= rstart && idxm[i] < rend) {
735       row = idxm[i] - rstart;
736       for (j=0; j<n; j++) {
737         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
738         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
739         if (idxn[j] >= cstart && idxn[j] < cend) {
740           col  = idxn[j] - cstart;
741           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
742         } else {
743           if (!aij->colmap) {
744             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
745           }
746 #if defined(PETSC_USE_CTABLE)
747           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
748           col--;
749 #else
750           col = aij->colmap[idxn[j]] - 1;
751 #endif
752           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
753           else {
754             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
755           }
756         }
757       }
758     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
764 
765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
766 {
767   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
768   PetscErrorCode ierr;
769   PetscInt       nstash,reallocs;
770 
771   PetscFunctionBegin;
772   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
773 
774   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
775   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
776   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
781 {
782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
783   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
784   PetscErrorCode ierr;
785   PetscMPIInt    n;
786   PetscInt       i,j,rstart,ncols,flg;
787   PetscInt       *row,*col;
788   PetscBool      other_disassembled;
789   PetscScalar    *val;
790 
791   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
792 
793   PetscFunctionBegin;
794   if (!aij->donotstash && !mat->nooffprocentries) {
795     while (1) {
796       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
797       if (!flg) break;
798 
799       for (i=0; i<n; ) {
800         /* Now identify the consecutive vals belonging to the same row */
801         for (j=i,rstart=row[j]; j<n; j++) {
802           if (row[j] != rstart) break;
803         }
804         if (j < n) ncols = j-i;
805         else       ncols = n-i;
806         /* Now assemble all these values with a single function call */
807         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
808 
809         i = j;
810       }
811     }
812     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
813   }
814   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
815   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
816 
817   /* determine if any processor has disassembled, if so we must
818      also disassemble ourselfs, in order that we may reassemble. */
819   /*
820      if nonzero structure of submatrix B cannot change then we know that
821      no processor disassembled thus we can skip this stuff
822   */
823   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
824     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
825     if (mat->was_assembled && !other_disassembled) {
826       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
827     }
828   }
829   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
830     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
831   }
832   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
833   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
835 
836   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
837 
838   aij->rowvalues = 0;
839 
840   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
841   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
842 
843   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
844   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
845     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
846     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
852 {
853   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
854   PetscErrorCode ierr;
855 
856   PetscFunctionBegin;
857   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
858   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
865   PetscInt      *lrows;
866   PetscInt       r, len;
867   PetscBool      cong;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
873   /* fix right hand side if needed */
874   if (x && b) {
875     const PetscScalar *xx;
876     PetscScalar       *bb;
877 
878     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
879     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
880     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
881     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
882     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
883   }
884   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
885   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
886   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
887   if ((diag != 0.0) && cong) {
888     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
889   } else if (diag != 0.0) {
890     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
891     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
892     for (r = 0; r < len; ++r) {
893       const PetscInt row = lrows[r] + A->rmap->rstart;
894       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
895     }
896     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
898   } else {
899     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
900   }
901   ierr = PetscFree(lrows);CHKERRQ(ierr);
902 
903   /* only change matrix nonzero state if pattern was allowed to be changed */
904   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
905     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
906     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
912 {
913   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
914   PetscErrorCode    ierr;
915   PetscMPIInt       n = A->rmap->n;
916   PetscInt          i,j,r,m,p = 0,len = 0;
917   PetscInt          *lrows,*owners = A->rmap->range;
918   PetscSFNode       *rrows;
919   PetscSF           sf;
920   const PetscScalar *xx;
921   PetscScalar       *bb,*mask;
922   Vec               xmask,lmask;
923   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
924   const PetscInt    *aj, *ii,*ridx;
925   PetscScalar       *aa;
926 
927   PetscFunctionBegin;
928   /* Create SF where leaves are input rows and roots are owned rows */
929   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
930   for (r = 0; r < n; ++r) lrows[r] = -1;
931   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
932   for (r = 0; r < N; ++r) {
933     const PetscInt idx   = rows[r];
934     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
935     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
936       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
937     }
938     rrows[r].rank  = p;
939     rrows[r].index = rows[r] - owners[p];
940   }
941   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
942   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
943   /* Collect flags for rows to be zeroed */
944   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
945   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
946   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
947   /* Compress and put in row numbers */
948   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
949   /* zero diagonal part of matrix */
950   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
951   /* handle off diagonal part of matrix */
952   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
953   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
954   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
955   for (i=0; i<len; i++) bb[lrows[i]] = 1;
956   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
957   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
960   if (x) {
961     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
964     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
965   }
966   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
967   /* remove zeroed rows of off diagonal matrix */
968   ii = aij->i;
969   for (i=0; i<len; i++) {
970     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
971   }
972   /* loop over all elements of off process part of matrix zeroing removed columns*/
973   if (aij->compressedrow.use) {
974     m    = aij->compressedrow.nrows;
975     ii   = aij->compressedrow.i;
976     ridx = aij->compressedrow.rindex;
977     for (i=0; i<m; i++) {
978       n  = ii[i+1] - ii[i];
979       aj = aij->j + ii[i];
980       aa = aij->a + ii[i];
981 
982       for (j=0; j<n; j++) {
983         if (PetscAbsScalar(mask[*aj])) {
984           if (b) bb[*ridx] -= *aa*xx[*aj];
985           *aa = 0.0;
986         }
987         aa++;
988         aj++;
989       }
990       ridx++;
991     }
992   } else { /* do not use compressed row format */
993     m = l->B->rmap->n;
994     for (i=0; i<m; i++) {
995       n  = ii[i+1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij->a + ii[i];
998       for (j=0; j<n; j++) {
999         if (PetscAbsScalar(mask[*aj])) {
1000           if (b) bb[i] -= *aa*xx[*aj];
1001           *aa = 0.0;
1002         }
1003         aa++;
1004         aj++;
1005       }
1006     }
1007   }
1008   if (x) {
1009     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1010     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011   }
1012   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1013   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1014   ierr = PetscFree(lrows);CHKERRQ(ierr);
1015 
1016   /* only change matrix nonzero state if pattern was allowed to be changed */
1017   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1018     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1019     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1025 {
1026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1027   PetscErrorCode ierr;
1028   PetscInt       nt;
1029   VecScatter     Mvctx = a->Mvctx;
1030 
1031   PetscFunctionBegin;
1032   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1033   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1034 
1035   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1043 {
1044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   VecScatter     Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1060   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071   PetscBool      merged;
1072 
1073   PetscFunctionBegin;
1074   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1075   /* do nondiagonal part */
1076   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1077   if (!merged) {
1078     /* send it on its way */
1079     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080     /* do local part */
1081     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1082     /* receive remote parts: note this assumes the values are not actually */
1083     /* added in yy until the next line, */
1084     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   } else {
1086     /* do local part */
1087     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1088     /* send it on its way */
1089     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090     /* values actually were received in the Begin() but we need to call this nop */
1091     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1097 {
1098   MPI_Comm       comm;
1099   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1100   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1101   IS             Me,Notme;
1102   PetscErrorCode ierr;
1103   PetscInt       M,N,first,last,*notme,i;
1104   PetscBool      lf;
1105   PetscMPIInt    size;
1106 
1107   PetscFunctionBegin;
1108   /* Easy test: symmetric diagonal block */
1109   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1110   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1111   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1112   if (!*f) PetscFunctionReturn(0);
1113   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1114   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1115   if (size == 1) PetscFunctionReturn(0);
1116 
1117   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1118   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1119   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1120   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1121   for (i=0; i<first; i++) notme[i] = i;
1122   for (i=last; i<M; i++) notme[i-last+first] = i;
1123   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1124   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1125   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1126   Aoff = Aoffs[0];
1127   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1128   Boff = Boffs[0];
1129   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1130   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1131   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1132   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1133   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1134   ierr = PetscFree(notme);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1139 {
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   /* do nondiagonal part */
1154   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1155   /* send it on its way */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   /* do local part */
1158   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1159   /* receive remote parts */
1160   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 /*
1165   This only works correctly for square matrices where the subblock A->A is the
1166    diagonal block
1167 */
1168 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1169 {
1170   PetscErrorCode ierr;
1171   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1172 
1173   PetscFunctionBegin;
1174   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1175   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1176   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1181 {
1182   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1183   PetscErrorCode ierr;
1184 
1185   PetscFunctionBegin;
1186   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1187   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1192 {
1193   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197 #if defined(PETSC_USE_LOG)
1198   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1199 #endif
1200   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1201   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1202   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1203   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1204 #if defined(PETSC_USE_CTABLE)
1205   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1206 #else
1207   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1208 #endif
1209   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1210   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1211   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1212   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1213   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1214   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1215   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1216 
1217   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1226 #if defined(PETSC_HAVE_ELEMENTAL)
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1228 #endif
1229 #if defined(PETSC_HAVE_HYPRE)
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1232 #endif
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1239 {
1240   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1241   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1242   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1243   PetscErrorCode ierr;
1244   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1245   int            fd;
1246   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1247   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1248   PetscScalar    *column_values;
1249   PetscInt       message_count,flowcontrolcount;
1250   FILE           *file;
1251 
1252   PetscFunctionBegin;
1253   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1254   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1255   nz   = A->nz + B->nz;
1256   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1257   if (!rank) {
1258     header[0] = MAT_FILE_CLASSID;
1259     header[1] = mat->rmap->N;
1260     header[2] = mat->cmap->N;
1261 
1262     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1263     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     /* get largest number of rows any processor has */
1265     rlen  = mat->rmap->n;
1266     range = mat->rmap->range;
1267     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1268   } else {
1269     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     rlen = mat->rmap->n;
1271   }
1272 
1273   /* load up the local row counts */
1274   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1275   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1276 
1277   /* store the row lengths to the file */
1278   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1279   if (!rank) {
1280     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     for (i=1; i<size; i++) {
1282       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1283       rlen = range[i+1] - range[i];
1284       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1285       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1286     }
1287     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1288   } else {
1289     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1290     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1291     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1292   }
1293   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1294 
1295   /* load up the local column indices */
1296   nzmax = nz; /* th processor needs space a largest processor needs */
1297   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1298   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1299   cnt   = 0;
1300   for (i=0; i<mat->rmap->n; i++) {
1301     for (j=B->i[i]; j<B->i[i+1]; j++) {
1302       if ((col = garray[B->j[j]]) > cstart) break;
1303       column_indices[cnt++] = col;
1304     }
1305     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1306     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1307   }
1308   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1309 
1310   /* store the column indices to the file */
1311   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1312   if (!rank) {
1313     MPI_Status status;
1314     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     for (i=1; i<size; i++) {
1316       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1317       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1318       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1319       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1320       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1321     }
1322     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1323   } else {
1324     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1325     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1326     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1327     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1328   }
1329   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1330 
1331   /* load up the local column values */
1332   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1333   cnt  = 0;
1334   for (i=0; i<mat->rmap->n; i++) {
1335     for (j=B->i[i]; j<B->i[i+1]; j++) {
1336       if (garray[B->j[j]] > cstart) break;
1337       column_values[cnt++] = B->a[j];
1338     }
1339     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1340     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1341   }
1342   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1343 
1344   /* store the column values to the file */
1345   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1346   if (!rank) {
1347     MPI_Status status;
1348     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1349     for (i=1; i<size; i++) {
1350       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1351       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1352       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1353       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1354       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1355     }
1356     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1357   } else {
1358     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1359     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1360     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1361     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1362   }
1363   ierr = PetscFree(column_values);CHKERRQ(ierr);
1364 
1365   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1366   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #include <petscdraw.h>
1371 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1372 {
1373   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1374   PetscErrorCode    ierr;
1375   PetscMPIInt       rank = aij->rank,size = aij->size;
1376   PetscBool         isdraw,iascii,isbinary;
1377   PetscViewer       sviewer;
1378   PetscViewerFormat format;
1379 
1380   PetscFunctionBegin;
1381   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1382   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1383   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1384   if (iascii) {
1385     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1386     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1387       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1388       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1389       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1390       for (i=0; i<(PetscInt)size; i++) {
1391         nmax = PetscMax(nmax,nz[i]);
1392         nmin = PetscMin(nmin,nz[i]);
1393         navg += nz[i];
1394       }
1395       ierr = PetscFree(nz);CHKERRQ(ierr);
1396       navg = navg/size;
1397       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1398       PetscFunctionReturn(0);
1399     }
1400     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1401     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1402       MatInfo   info;
1403       PetscBool inodes;
1404 
1405       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1406       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1407       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1408       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1409       if (!inodes) {
1410         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1411                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1412       } else {
1413         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1414                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1415       }
1416       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1417       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1418       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1420       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1421       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1422       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1423       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1424       PetscFunctionReturn(0);
1425     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1426       PetscInt inodecount,inodelimit,*inodes;
1427       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1428       if (inodes) {
1429         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1430       } else {
1431         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1432       }
1433       PetscFunctionReturn(0);
1434     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1435       PetscFunctionReturn(0);
1436     }
1437   } else if (isbinary) {
1438     if (size == 1) {
1439       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1440       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1441     } else {
1442       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1443     }
1444     PetscFunctionReturn(0);
1445   } else if (isdraw) {
1446     PetscDraw draw;
1447     PetscBool isnull;
1448     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1449     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1450     if (isnull) PetscFunctionReturn(0);
1451   }
1452 
1453   {
1454     /* assemble the entire matrix onto first processor. */
1455     Mat        A;
1456     Mat_SeqAIJ *Aloc;
1457     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1458     MatScalar  *a;
1459 
1460     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1461     if (!rank) {
1462       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1463     } else {
1464       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1465     }
1466     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1467     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1468     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1469     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1470     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1471 
1472     /* copy over the A part */
1473     Aloc = (Mat_SeqAIJ*)aij->A->data;
1474     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1475     row  = mat->rmap->rstart;
1476     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1477     for (i=0; i<m; i++) {
1478       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1479       row++;
1480       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1481     }
1482     aj = Aloc->j;
1483     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1484 
1485     /* copy over the B part */
1486     Aloc = (Mat_SeqAIJ*)aij->B->data;
1487     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1488     row  = mat->rmap->rstart;
1489     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1490     ct   = cols;
1491     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1492     for (i=0; i<m; i++) {
1493       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1494       row++;
1495       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1496     }
1497     ierr = PetscFree(ct);CHKERRQ(ierr);
1498     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1499     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1500     /*
1501        Everyone has to call to draw the matrix since the graphics waits are
1502        synchronized across all processors that share the PetscDraw object
1503     */
1504     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     if (!rank) {
1506       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1507       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1508     }
1509     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1510     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1511     ierr = MatDestroy(&A);CHKERRQ(ierr);
1512   }
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1517 {
1518   PetscErrorCode ierr;
1519   PetscBool      iascii,isdraw,issocket,isbinary;
1520 
1521   PetscFunctionBegin;
1522   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1523   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1524   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1525   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1526   if (iascii || isdraw || isbinary || issocket) {
1527     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1533 {
1534   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1535   PetscErrorCode ierr;
1536   Vec            bb1 = 0;
1537   PetscBool      hasop;
1538 
1539   PetscFunctionBegin;
1540   if (flag == SOR_APPLY_UPPER) {
1541     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1542     PetscFunctionReturn(0);
1543   }
1544 
1545   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1546     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1547   }
1548 
1549   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1550     if (flag & SOR_ZERO_INITIAL_GUESS) {
1551       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1552       its--;
1553     }
1554 
1555     while (its--) {
1556       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1557       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1558 
1559       /* update rhs: bb1 = bb - B*x */
1560       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1561       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1562 
1563       /* local sweep */
1564       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1565     }
1566   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1567     if (flag & SOR_ZERO_INITIAL_GUESS) {
1568       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1569       its--;
1570     }
1571     while (its--) {
1572       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574 
1575       /* update rhs: bb1 = bb - B*x */
1576       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1577       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1578 
1579       /* local sweep */
1580       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1581     }
1582   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1583     if (flag & SOR_ZERO_INITIAL_GUESS) {
1584       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1585       its--;
1586     }
1587     while (its--) {
1588       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1589       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1590 
1591       /* update rhs: bb1 = bb - B*x */
1592       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1593       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1594 
1595       /* local sweep */
1596       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1597     }
1598   } else if (flag & SOR_EISENSTAT) {
1599     Vec xx1;
1600 
1601     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1602     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1603 
1604     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606     if (!mat->diag) {
1607       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1608       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1609     }
1610     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1611     if (hasop) {
1612       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1613     } else {
1614       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1615     }
1616     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1617 
1618     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1619 
1620     /* local sweep */
1621     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1622     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1623     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1624   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1625 
1626   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1627 
1628   matin->factorerrortype = mat->A->factorerrortype;
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1633 {
1634   Mat            aA,aB,Aperm;
1635   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1636   PetscScalar    *aa,*ba;
1637   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1638   PetscSF        rowsf,sf;
1639   IS             parcolp = NULL;
1640   PetscBool      done;
1641   PetscErrorCode ierr;
1642 
1643   PetscFunctionBegin;
1644   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1645   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1646   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1647   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1648 
1649   /* Invert row permutation to find out where my rows should go */
1650   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1651   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1652   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1653   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1654   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1655   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1656 
1657   /* Invert column permutation to find out where my columns should go */
1658   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1659   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1660   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1661   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1662   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1663   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1664   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1665 
1666   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1667   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1668   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1669 
1670   /* Find out where my gcols should go */
1671   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1672   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1674   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1675   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1676   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1677   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1678   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1679 
1680   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1681   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1682   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1683   for (i=0; i<m; i++) {
1684     PetscInt row = rdest[i],rowner;
1685     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1686     for (j=ai[i]; j<ai[i+1]; j++) {
1687       PetscInt cowner,col = cdest[aj[j]];
1688       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1689       if (rowner == cowner) dnnz[i]++;
1690       else onnz[i]++;
1691     }
1692     for (j=bi[i]; j<bi[i+1]; j++) {
1693       PetscInt cowner,col = gcdest[bj[j]];
1694       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1695       if (rowner == cowner) dnnz[i]++;
1696       else onnz[i]++;
1697     }
1698   }
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1701   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1702   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1703   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1704 
1705   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1706   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1707   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1708   for (i=0; i<m; i++) {
1709     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1710     PetscInt j0,rowlen;
1711     rowlen = ai[i+1] - ai[i];
1712     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1713       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1714       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1715     }
1716     rowlen = bi[i+1] - bi[i];
1717     for (j0=j=0; j<rowlen; j0=j) {
1718       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1719       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1720     }
1721   }
1722   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1725   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1726   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1727   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1728   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1729   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1730   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1731   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1732   *B = Aperm;
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1737 {
1738   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1743   if (ghosts) *ghosts = aij->garray;
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1748 {
1749   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1750   Mat            A    = mat->A,B = mat->B;
1751   PetscErrorCode ierr;
1752   PetscReal      isend[5],irecv[5];
1753 
1754   PetscFunctionBegin;
1755   info->block_size = 1.0;
1756   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1757 
1758   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1759   isend[3] = info->memory;  isend[4] = info->mallocs;
1760 
1761   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1762 
1763   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1764   isend[3] += info->memory;  isend[4] += info->mallocs;
1765   if (flag == MAT_LOCAL) {
1766     info->nz_used      = isend[0];
1767     info->nz_allocated = isend[1];
1768     info->nz_unneeded  = isend[2];
1769     info->memory       = isend[3];
1770     info->mallocs      = isend[4];
1771   } else if (flag == MAT_GLOBAL_MAX) {
1772     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1773 
1774     info->nz_used      = irecv[0];
1775     info->nz_allocated = irecv[1];
1776     info->nz_unneeded  = irecv[2];
1777     info->memory       = irecv[3];
1778     info->mallocs      = irecv[4];
1779   } else if (flag == MAT_GLOBAL_SUM) {
1780     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1781 
1782     info->nz_used      = irecv[0];
1783     info->nz_allocated = irecv[1];
1784     info->nz_unneeded  = irecv[2];
1785     info->memory       = irecv[3];
1786     info->mallocs      = irecv[4];
1787   }
1788   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1789   info->fill_ratio_needed = 0;
1790   info->factor_mallocs    = 0;
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1795 {
1796   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1797   PetscErrorCode ierr;
1798 
1799   PetscFunctionBegin;
1800   switch (op) {
1801   case MAT_NEW_NONZERO_LOCATIONS:
1802   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1803   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1804   case MAT_KEEP_NONZERO_PATTERN:
1805   case MAT_NEW_NONZERO_LOCATION_ERR:
1806   case MAT_USE_INODES:
1807   case MAT_IGNORE_ZERO_ENTRIES:
1808     MatCheckPreallocated(A,1);
1809     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1810     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1811     break;
1812   case MAT_ROW_ORIENTED:
1813     MatCheckPreallocated(A,1);
1814     a->roworiented = flg;
1815 
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_NEW_DIAGONALS:
1820     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1821     break;
1822   case MAT_IGNORE_OFF_PROC_ENTRIES:
1823     a->donotstash = flg;
1824     break;
1825   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1826   case MAT_SPD:
1827   case MAT_SYMMETRIC:
1828   case MAT_STRUCTURALLY_SYMMETRIC:
1829   case MAT_HERMITIAN:
1830   case MAT_SYMMETRY_ETERNAL:
1831     break;
1832   case MAT_SUBMAT_SINGLEIS:
1833     A->submat_singleis = flg;
1834     break;
1835   case MAT_STRUCTURE_ONLY:
1836     /* The option is handled directly by MatSetOption() */
1837     break;
1838   case MAT_REUSE:
1839     /* The option is handled directly by MatSetOption() */
1840     break;
1841   default:
1842     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1843   }
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1848 {
1849   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1850   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1851   PetscErrorCode ierr;
1852   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1853   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1854   PetscInt       *cmap,*idx_p;
1855 
1856   PetscFunctionBegin;
1857   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1858   mat->getrowactive = PETSC_TRUE;
1859 
1860   if (!mat->rowvalues && (idx || v)) {
1861     /*
1862         allocate enough space to hold information from the longest row.
1863     */
1864     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1865     PetscInt   max = 1,tmp;
1866     for (i=0; i<matin->rmap->n; i++) {
1867       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1868       if (max < tmp) max = tmp;
1869     }
1870     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1871   }
1872 
1873   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1874   lrow = row - rstart;
1875 
1876   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1877   if (!v)   {pvA = 0; pvB = 0;}
1878   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1879   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1880   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1881   nztot = nzA + nzB;
1882 
1883   cmap = mat->garray;
1884   if (v  || idx) {
1885     if (nztot) {
1886       /* Sort by increasing column numbers, assuming A and B already sorted */
1887       PetscInt imark = -1;
1888       if (v) {
1889         *v = v_p = mat->rowvalues;
1890         for (i=0; i<nzB; i++) {
1891           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1892           else break;
1893         }
1894         imark = i;
1895         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1896         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1897       }
1898       if (idx) {
1899         *idx = idx_p = mat->rowindices;
1900         if (imark > -1) {
1901           for (i=0; i<imark; i++) {
1902             idx_p[i] = cmap[cworkB[i]];
1903           }
1904         } else {
1905           for (i=0; i<nzB; i++) {
1906             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1907             else break;
1908           }
1909           imark = i;
1910         }
1911         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1912         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1913       }
1914     } else {
1915       if (idx) *idx = 0;
1916       if (v)   *v   = 0;
1917     }
1918   }
1919   *nz  = nztot;
1920   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1921   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1926 {
1927   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1928 
1929   PetscFunctionBegin;
1930   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1931   aij->getrowactive = PETSC_FALSE;
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1936 {
1937   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1938   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1939   PetscErrorCode ierr;
1940   PetscInt       i,j,cstart = mat->cmap->rstart;
1941   PetscReal      sum = 0.0;
1942   MatScalar      *v;
1943 
1944   PetscFunctionBegin;
1945   if (aij->size == 1) {
1946     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1947   } else {
1948     if (type == NORM_FROBENIUS) {
1949       v = amat->a;
1950       for (i=0; i<amat->nz; i++) {
1951         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1952       }
1953       v = bmat->a;
1954       for (i=0; i<bmat->nz; i++) {
1955         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1956       }
1957       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1958       *norm = PetscSqrtReal(*norm);
1959       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1960     } else if (type == NORM_1) { /* max column norm */
1961       PetscReal *tmp,*tmp2;
1962       PetscInt  *jj,*garray = aij->garray;
1963       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1964       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1965       *norm = 0.0;
1966       v     = amat->a; jj = amat->j;
1967       for (j=0; j<amat->nz; j++) {
1968         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1969       }
1970       v = bmat->a; jj = bmat->j;
1971       for (j=0; j<bmat->nz; j++) {
1972         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1973       }
1974       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1975       for (j=0; j<mat->cmap->N; j++) {
1976         if (tmp2[j] > *norm) *norm = tmp2[j];
1977       }
1978       ierr = PetscFree(tmp);CHKERRQ(ierr);
1979       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1980       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1981     } else if (type == NORM_INFINITY) { /* max row norm */
1982       PetscReal ntemp = 0.0;
1983       for (j=0; j<aij->A->rmap->n; j++) {
1984         v   = amat->a + amat->i[j];
1985         sum = 0.0;
1986         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         v = bmat->a + bmat->i[j];
1990         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1991           sum += PetscAbsScalar(*v); v++;
1992         }
1993         if (sum > ntemp) ntemp = sum;
1994       }
1995       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1996       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1997     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1998   }
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2003 {
2004   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2005   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2006   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2007   PetscErrorCode ierr;
2008   Mat            B,A_diag,*B_diag;
2009   MatScalar      *array;
2010 
2011   PetscFunctionBegin;
2012   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2013   ai = Aloc->i; aj = Aloc->j;
2014   bi = Bloc->i; bj = Bloc->j;
2015   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2016     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2017     PetscSFNode          *oloc;
2018     PETSC_UNUSED PetscSF sf;
2019 
2020     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2021     /* compute d_nnz for preallocation */
2022     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2023     for (i=0; i<ai[ma]; i++) {
2024       d_nnz[aj[i]]++;
2025     }
2026     /* compute local off-diagonal contributions */
2027     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2028     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2029     /* map those to global */
2030     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2031     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2032     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2033     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2034     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2035     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2036     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2037 
2038     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2039     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2040     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2041     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2042     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2043     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2044   } else {
2045     B    = *matout;
2046     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2047   }
2048 
2049   b           = (Mat_MPIAIJ*)B->data;
2050   A_diag      = a->A;
2051   B_diag      = &b->A;
2052   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2053   A_diag_ncol = A_diag->cmap->N;
2054   B_diag_ilen = sub_B_diag->ilen;
2055   B_diag_i    = sub_B_diag->i;
2056 
2057   /* Set ilen for diagonal of B */
2058   for (i=0; i<A_diag_ncol; i++) {
2059     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2060   }
2061 
2062   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2063   very quickly (=without using MatSetValues), because all writes are local. */
2064   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2065 
2066   /* copy over the B part */
2067   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2068   array = Bloc->a;
2069   row   = A->rmap->rstart;
2070   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2071   cols_tmp = cols;
2072   for (i=0; i<mb; i++) {
2073     ncol = bi[i+1]-bi[i];
2074     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2075     row++;
2076     array += ncol; cols_tmp += ncol;
2077   }
2078   ierr = PetscFree(cols);CHKERRQ(ierr);
2079 
2080   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2081   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2082   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2083     *matout = B;
2084   } else {
2085     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2086   }
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2091 {
2092   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2093   Mat            a    = aij->A,b = aij->B;
2094   PetscErrorCode ierr;
2095   PetscInt       s1,s2,s3;
2096 
2097   PetscFunctionBegin;
2098   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2099   if (rr) {
2100     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2101     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2102     /* Overlap communication with computation. */
2103     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2104   }
2105   if (ll) {
2106     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2107     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2108     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2109   }
2110   /* scale  the diagonal block */
2111   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2112 
2113   if (rr) {
2114     /* Do a scatter end and then right scale the off-diagonal block */
2115     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2116     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2117   }
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2122 {
2123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2124   PetscErrorCode ierr;
2125 
2126   PetscFunctionBegin;
2127   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2132 {
2133   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2134   Mat            a,b,c,d;
2135   PetscBool      flg;
2136   PetscErrorCode ierr;
2137 
2138   PetscFunctionBegin;
2139   a = matA->A; b = matA->B;
2140   c = matB->A; d = matB->B;
2141 
2142   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2143   if (flg) {
2144     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2145   }
2146   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2151 {
2152   PetscErrorCode ierr;
2153   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2154   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2155 
2156   PetscFunctionBegin;
2157   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2158   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2159     /* because of the column compression in the off-processor part of the matrix a->B,
2160        the number of columns in a->B and b->B may be different, hence we cannot call
2161        the MatCopy() directly on the two parts. If need be, we can provide a more
2162        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2163        then copying the submatrices */
2164     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2165   } else {
2166     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2167     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2168   }
2169   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2174 {
2175   PetscErrorCode ierr;
2176 
2177   PetscFunctionBegin;
2178   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 /*
2183    Computes the number of nonzeros per row needed for preallocation when X and Y
2184    have different nonzero structure.
2185 */
2186 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2187 {
2188   PetscInt       i,j,k,nzx,nzy;
2189 
2190   PetscFunctionBegin;
2191   /* Set the number of nonzeros in the new matrix */
2192   for (i=0; i<m; i++) {
2193     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2194     nzx = xi[i+1] - xi[i];
2195     nzy = yi[i+1] - yi[i];
2196     nnz[i] = 0;
2197     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2198       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2199       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2200       nnz[i]++;
2201     }
2202     for (; k<nzy; k++) nnz[i]++;
2203   }
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2208 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2209 {
2210   PetscErrorCode ierr;
2211   PetscInt       m = Y->rmap->N;
2212   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2213   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2214 
2215   PetscFunctionBegin;
2216   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2221 {
2222   PetscErrorCode ierr;
2223   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2224   PetscBLASInt   bnz,one=1;
2225   Mat_SeqAIJ     *x,*y;
2226 
2227   PetscFunctionBegin;
2228   if (str == SAME_NONZERO_PATTERN) {
2229     PetscScalar alpha = a;
2230     x    = (Mat_SeqAIJ*)xx->A->data;
2231     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2232     y    = (Mat_SeqAIJ*)yy->A->data;
2233     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2234     x    = (Mat_SeqAIJ*)xx->B->data;
2235     y    = (Mat_SeqAIJ*)yy->B->data;
2236     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2237     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2238     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2239   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2240     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2241   } else {
2242     Mat      B;
2243     PetscInt *nnz_d,*nnz_o;
2244     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2245     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2246     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2247     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2248     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2249     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2250     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2251     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2253     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2254     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2255     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2256     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2257     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2258   }
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2263 
2264 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2265 {
2266 #if defined(PETSC_USE_COMPLEX)
2267   PetscErrorCode ierr;
2268   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2269 
2270   PetscFunctionBegin;
2271   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2272   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2273 #else
2274   PetscFunctionBegin;
2275 #endif
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2280 {
2281   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2282   PetscErrorCode ierr;
2283 
2284   PetscFunctionBegin;
2285   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2286   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2291 {
2292   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2293   PetscErrorCode ierr;
2294 
2295   PetscFunctionBegin;
2296   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2297   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2302 {
2303   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2304   PetscErrorCode ierr;
2305   PetscInt       i,*idxb = 0;
2306   PetscScalar    *va,*vb;
2307   Vec            vtmp;
2308 
2309   PetscFunctionBegin;
2310   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2311   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2312   if (idx) {
2313     for (i=0; i<A->rmap->n; i++) {
2314       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2315     }
2316   }
2317 
2318   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2319   if (idx) {
2320     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2321   }
2322   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2323   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2324 
2325   for (i=0; i<A->rmap->n; i++) {
2326     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2327       va[i] = vb[i];
2328       if (idx) idx[i] = a->garray[idxb[i]];
2329     }
2330   }
2331 
2332   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2333   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2334   ierr = PetscFree(idxb);CHKERRQ(ierr);
2335   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2340 {
2341   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2342   PetscErrorCode ierr;
2343   PetscInt       i,*idxb = 0;
2344   PetscScalar    *va,*vb;
2345   Vec            vtmp;
2346 
2347   PetscFunctionBegin;
2348   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2349   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2350   if (idx) {
2351     for (i=0; i<A->cmap->n; i++) {
2352       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2353     }
2354   }
2355 
2356   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2357   if (idx) {
2358     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2359   }
2360   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2361   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2362 
2363   for (i=0; i<A->rmap->n; i++) {
2364     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2365       va[i] = vb[i];
2366       if (idx) idx[i] = a->garray[idxb[i]];
2367     }
2368   }
2369 
2370   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2371   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2372   ierr = PetscFree(idxb);CHKERRQ(ierr);
2373   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2378 {
2379   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2380   PetscInt       n      = A->rmap->n;
2381   PetscInt       cstart = A->cmap->rstart;
2382   PetscInt       *cmap  = mat->garray;
2383   PetscInt       *diagIdx, *offdiagIdx;
2384   Vec            diagV, offdiagV;
2385   PetscScalar    *a, *diagA, *offdiagA;
2386   PetscInt       r;
2387   PetscErrorCode ierr;
2388 
2389   PetscFunctionBegin;
2390   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2391   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2392   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2393   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2394   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2395   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2396   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2397   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2398   for (r = 0; r < n; ++r) {
2399     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2400       a[r]   = diagA[r];
2401       idx[r] = cstart + diagIdx[r];
2402     } else {
2403       a[r]   = offdiagA[r];
2404       idx[r] = cmap[offdiagIdx[r]];
2405     }
2406   }
2407   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2408   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2409   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2410   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2411   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2412   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2417 {
2418   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2419   PetscInt       n      = A->rmap->n;
2420   PetscInt       cstart = A->cmap->rstart;
2421   PetscInt       *cmap  = mat->garray;
2422   PetscInt       *diagIdx, *offdiagIdx;
2423   Vec            diagV, offdiagV;
2424   PetscScalar    *a, *diagA, *offdiagA;
2425   PetscInt       r;
2426   PetscErrorCode ierr;
2427 
2428   PetscFunctionBegin;
2429   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2430   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2431   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2432   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2433   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2434   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2435   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2436   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2437   for (r = 0; r < n; ++r) {
2438     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2439       a[r]   = diagA[r];
2440       idx[r] = cstart + diagIdx[r];
2441     } else {
2442       a[r]   = offdiagA[r];
2443       idx[r] = cmap[offdiagIdx[r]];
2444     }
2445   }
2446   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2447   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2448   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2449   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2450   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2451   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2456 {
2457   PetscErrorCode ierr;
2458   Mat            *dummy;
2459 
2460   PetscFunctionBegin;
2461   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2462   *newmat = *dummy;
2463   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2468 {
2469   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2470   PetscErrorCode ierr;
2471 
2472   PetscFunctionBegin;
2473   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2474   A->factorerrortype = a->A->factorerrortype;
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2479 {
2480   PetscErrorCode ierr;
2481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2482 
2483   PetscFunctionBegin;
2484   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2485   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2486   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2487   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2492 {
2493   PetscFunctionBegin;
2494   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2495   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 /*@
2500    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2501 
2502    Collective on Mat
2503 
2504    Input Parameters:
2505 +    A - the matrix
2506 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2507 
2508  Level: advanced
2509 
2510 @*/
2511 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2512 {
2513   PetscErrorCode       ierr;
2514 
2515   PetscFunctionBegin;
2516   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2521 {
2522   PetscErrorCode       ierr;
2523   PetscBool            sc = PETSC_FALSE,flg;
2524 
2525   PetscFunctionBegin;
2526   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2527   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2528   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2529   if (flg) {
2530     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2531   }
2532   ierr = PetscOptionsTail();CHKERRQ(ierr);
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2537 {
2538   PetscErrorCode ierr;
2539   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2540   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2541 
2542   PetscFunctionBegin;
2543   if (!Y->preallocated) {
2544     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2545   } else if (!aij->nz) {
2546     PetscInt nonew = aij->nonew;
2547     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2548     aij->nonew = nonew;
2549   }
2550   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2555 {
2556   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2557   PetscErrorCode ierr;
2558 
2559   PetscFunctionBegin;
2560   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2561   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2562   if (d) {
2563     PetscInt rstart;
2564     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2565     *d += rstart;
2566 
2567   }
2568   PetscFunctionReturn(0);
2569 }
2570 
2571 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2572 {
2573   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2574   PetscErrorCode ierr;
2575 
2576   PetscFunctionBegin;
2577   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 /* -------------------------------------------------------------------*/
2582 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2583                                        MatGetRow_MPIAIJ,
2584                                        MatRestoreRow_MPIAIJ,
2585                                        MatMult_MPIAIJ,
2586                                 /* 4*/ MatMultAdd_MPIAIJ,
2587                                        MatMultTranspose_MPIAIJ,
2588                                        MatMultTransposeAdd_MPIAIJ,
2589                                        0,
2590                                        0,
2591                                        0,
2592                                 /*10*/ 0,
2593                                        0,
2594                                        0,
2595                                        MatSOR_MPIAIJ,
2596                                        MatTranspose_MPIAIJ,
2597                                 /*15*/ MatGetInfo_MPIAIJ,
2598                                        MatEqual_MPIAIJ,
2599                                        MatGetDiagonal_MPIAIJ,
2600                                        MatDiagonalScale_MPIAIJ,
2601                                        MatNorm_MPIAIJ,
2602                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2603                                        MatAssemblyEnd_MPIAIJ,
2604                                        MatSetOption_MPIAIJ,
2605                                        MatZeroEntries_MPIAIJ,
2606                                 /*24*/ MatZeroRows_MPIAIJ,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                        0,
2611                                 /*29*/ MatSetUp_MPIAIJ,
2612                                        0,
2613                                        0,
2614                                        MatGetDiagonalBlock_MPIAIJ,
2615                                        0,
2616                                 /*34*/ MatDuplicate_MPIAIJ,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                 /*39*/ MatAXPY_MPIAIJ,
2622                                        MatCreateSubMatrices_MPIAIJ,
2623                                        MatIncreaseOverlap_MPIAIJ,
2624                                        MatGetValues_MPIAIJ,
2625                                        MatCopy_MPIAIJ,
2626                                 /*44*/ MatGetRowMax_MPIAIJ,
2627                                        MatScale_MPIAIJ,
2628                                        MatShift_MPIAIJ,
2629                                        MatDiagonalSet_MPIAIJ,
2630                                        MatZeroRowsColumns_MPIAIJ,
2631                                 /*49*/ MatSetRandom_MPIAIJ,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2637                                        0,
2638                                        MatSetUnfactored_MPIAIJ,
2639                                        MatPermute_MPIAIJ,
2640                                        0,
2641                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2642                                        MatDestroy_MPIAIJ,
2643                                        MatView_MPIAIJ,
2644                                        0,
2645                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2646                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2647                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2652                                        MatGetRowMinAbs_MPIAIJ,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                 /*75*/ MatFDColoringApply_AIJ,
2658                                        MatSetFromOptions_MPIAIJ,
2659                                        0,
2660                                        0,
2661                                        MatFindZeroDiagonals_MPIAIJ,
2662                                 /*80*/ 0,
2663                                        0,
2664                                        0,
2665                                 /*83*/ MatLoad_MPIAIJ,
2666                                        MatIsSymmetric_MPIAIJ,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2672                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2673                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2674                                        MatPtAP_MPIAIJ_MPIAIJ,
2675                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2676                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                 /*99*/ 0,
2682                                        0,
2683                                        0,
2684                                        MatConjugate_MPIAIJ,
2685                                        0,
2686                                 /*104*/MatSetValuesRow_MPIAIJ,
2687                                        MatRealPart_MPIAIJ,
2688                                        MatImaginaryPart_MPIAIJ,
2689                                        0,
2690                                        0,
2691                                 /*109*/0,
2692                                        0,
2693                                        MatGetRowMin_MPIAIJ,
2694                                        0,
2695                                        MatMissingDiagonal_MPIAIJ,
2696                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2697                                        0,
2698                                        MatGetGhosts_MPIAIJ,
2699                                        0,
2700                                        0,
2701                                 /*119*/0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                        MatGetMultiProcBlock_MPIAIJ,
2706                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2707                                        MatGetColumnNorms_MPIAIJ,
2708                                        MatInvertBlockDiagonal_MPIAIJ,
2709                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2710                                        MatCreateSubMatricesMPI_MPIAIJ,
2711                                 /*129*/0,
2712                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2713                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2714                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2715                                        0,
2716                                 /*134*/0,
2717                                        0,
2718                                        MatRARt_MPIAIJ_MPIAIJ,
2719                                        0,
2720                                        0,
2721                                 /*139*/MatSetBlockSizes_MPIAIJ,
2722                                        0,
2723                                        0,
2724                                        MatFDColoringSetUp_MPIXAIJ,
2725                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2726                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2727 };
2728 
2729 /* ----------------------------------------------------------------------------------------*/
2730 
2731 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2732 {
2733   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2734   PetscErrorCode ierr;
2735 
2736   PetscFunctionBegin;
2737   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2738   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2739   PetscFunctionReturn(0);
2740 }
2741 
2742 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2743 {
2744   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2745   PetscErrorCode ierr;
2746 
2747   PetscFunctionBegin;
2748   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2749   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2750   PetscFunctionReturn(0);
2751 }
2752 
2753 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2754 {
2755   Mat_MPIAIJ     *b;
2756   PetscErrorCode ierr;
2757 
2758   PetscFunctionBegin;
2759   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2760   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2761   b = (Mat_MPIAIJ*)B->data;
2762 
2763 #if defined(PETSC_USE_CTABLE)
2764   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2765 #else
2766   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2767 #endif
2768   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2769   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2770   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2771 
2772   /* Because the B will have been resized we simply destroy it and create a new one each time */
2773   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2774   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2775   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2776   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2777   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2778   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2779 
2780   if (!B->preallocated) {
2781     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2782     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2783     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2784     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2785     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2786   }
2787 
2788   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2789   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2790   B->preallocated  = PETSC_TRUE;
2791   B->was_assembled = PETSC_FALSE;
2792   B->assembled     = PETSC_FALSE;;
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2797 {
2798   Mat_MPIAIJ     *b;
2799   PetscErrorCode ierr;
2800 
2801   PetscFunctionBegin;
2802   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2803   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2804   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2805   b = (Mat_MPIAIJ*)B->data;
2806 
2807 #if defined(PETSC_USE_CTABLE)
2808   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2809 #else
2810   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2811 #endif
2812   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2813   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2814   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2815 
2816   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2817   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2818   B->preallocated  = PETSC_TRUE;
2819   B->was_assembled = PETSC_FALSE;
2820   B->assembled = PETSC_FALSE;
2821   PetscFunctionReturn(0);
2822 }
2823 
2824 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2825 {
2826   Mat            mat;
2827   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2828   PetscErrorCode ierr;
2829 
2830   PetscFunctionBegin;
2831   *newmat = 0;
2832   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2833   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2834   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2835   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2836   a       = (Mat_MPIAIJ*)mat->data;
2837 
2838   mat->factortype   = matin->factortype;
2839   mat->assembled    = PETSC_TRUE;
2840   mat->insertmode   = NOT_SET_VALUES;
2841   mat->preallocated = PETSC_TRUE;
2842 
2843   a->size         = oldmat->size;
2844   a->rank         = oldmat->rank;
2845   a->donotstash   = oldmat->donotstash;
2846   a->roworiented  = oldmat->roworiented;
2847   a->rowindices   = 0;
2848   a->rowvalues    = 0;
2849   a->getrowactive = PETSC_FALSE;
2850 
2851   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2852   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2853 
2854   if (oldmat->colmap) {
2855 #if defined(PETSC_USE_CTABLE)
2856     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2857 #else
2858     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2859     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2860     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2861 #endif
2862   } else a->colmap = 0;
2863   if (oldmat->garray) {
2864     PetscInt len;
2865     len  = oldmat->B->cmap->n;
2866     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2867     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2868     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2869   } else a->garray = 0;
2870 
2871   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2872   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2873   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2874   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2875 
2876   if (oldmat->Mvctx_mpi1) {
2877     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2878     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2879   }
2880 
2881   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2882   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2883   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2884   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2885   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2886   *newmat = mat;
2887   PetscFunctionReturn(0);
2888 }
2889 
2890 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2891 {
2892   PetscBool      isbinary, ishdf5;
2893   PetscErrorCode ierr;
2894 
2895   PetscFunctionBegin;
2896   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2897   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2898   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2899   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2900   if (isbinary) {
2901     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2902   } else if (ishdf5) {
2903 #if defined(PETSC_HAVE_HDF5)
2904     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2905 #else
2906     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2907 #endif
2908   } else {
2909     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2910   }
2911   PetscFunctionReturn(0);
2912 }
2913 
2914 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2915 {
2916   PetscScalar    *vals,*svals;
2917   MPI_Comm       comm;
2918   PetscErrorCode ierr;
2919   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2920   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2921   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2922   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2923   PetscInt       cend,cstart,n,*rowners;
2924   int            fd;
2925   PetscInt       bs = newMat->rmap->bs;
2926 
2927   PetscFunctionBegin;
2928   /* force binary viewer to load .info file if it has not yet done so */
2929   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2930   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2931   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2932   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2933   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2934   if (!rank) {
2935     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2936     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2937     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2938   }
2939 
2940   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2941   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2942   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2943   if (bs < 0) bs = 1;
2944 
2945   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2946   M    = header[1]; N = header[2];
2947 
2948   /* If global sizes are set, check if they are consistent with that given in the file */
2949   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2950   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2951 
2952   /* determine ownership of all (block) rows */
2953   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2954   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2955   else m = newMat->rmap->n; /* Set by user */
2956 
2957   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2958   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2959 
2960   /* First process needs enough room for process with most rows */
2961   if (!rank) {
2962     mmax = rowners[1];
2963     for (i=2; i<=size; i++) {
2964       mmax = PetscMax(mmax, rowners[i]);
2965     }
2966   } else mmax = -1;             /* unused, but compilers complain */
2967 
2968   rowners[0] = 0;
2969   for (i=2; i<=size; i++) {
2970     rowners[i] += rowners[i-1];
2971   }
2972   rstart = rowners[rank];
2973   rend   = rowners[rank+1];
2974 
2975   /* distribute row lengths to all processors */
2976   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2977   if (!rank) {
2978     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2979     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2980     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2981     for (j=0; j<m; j++) {
2982       procsnz[0] += ourlens[j];
2983     }
2984     for (i=1; i<size; i++) {
2985       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2986       /* calculate the number of nonzeros on each processor */
2987       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2988         procsnz[i] += rowlengths[j];
2989       }
2990       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2991     }
2992     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2993   } else {
2994     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2995   }
2996 
2997   if (!rank) {
2998     /* determine max buffer needed and allocate it */
2999     maxnz = 0;
3000     for (i=0; i<size; i++) {
3001       maxnz = PetscMax(maxnz,procsnz[i]);
3002     }
3003     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3004 
3005     /* read in my part of the matrix column indices  */
3006     nz   = procsnz[0];
3007     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3008     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3009 
3010     /* read in every one elses and ship off */
3011     for (i=1; i<size; i++) {
3012       nz   = procsnz[i];
3013       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3014       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3015     }
3016     ierr = PetscFree(cols);CHKERRQ(ierr);
3017   } else {
3018     /* determine buffer space needed for message */
3019     nz = 0;
3020     for (i=0; i<m; i++) {
3021       nz += ourlens[i];
3022     }
3023     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3024 
3025     /* receive message of column indices*/
3026     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3027   }
3028 
3029   /* determine column ownership if matrix is not square */
3030   if (N != M) {
3031     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3032     else n = newMat->cmap->n;
3033     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3034     cstart = cend - n;
3035   } else {
3036     cstart = rstart;
3037     cend   = rend;
3038     n      = cend - cstart;
3039   }
3040 
3041   /* loop over local rows, determining number of off diagonal entries */
3042   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3043   jj   = 0;
3044   for (i=0; i<m; i++) {
3045     for (j=0; j<ourlens[i]; j++) {
3046       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3047       jj++;
3048     }
3049   }
3050 
3051   for (i=0; i<m; i++) {
3052     ourlens[i] -= offlens[i];
3053   }
3054   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3055 
3056   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3057 
3058   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3059 
3060   for (i=0; i<m; i++) {
3061     ourlens[i] += offlens[i];
3062   }
3063 
3064   if (!rank) {
3065     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3066 
3067     /* read in my part of the matrix numerical values  */
3068     nz   = procsnz[0];
3069     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3070 
3071     /* insert into matrix */
3072     jj      = rstart;
3073     smycols = mycols;
3074     svals   = vals;
3075     for (i=0; i<m; i++) {
3076       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3077       smycols += ourlens[i];
3078       svals   += ourlens[i];
3079       jj++;
3080     }
3081 
3082     /* read in other processors and ship out */
3083     for (i=1; i<size; i++) {
3084       nz   = procsnz[i];
3085       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3086       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3087     }
3088     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3089   } else {
3090     /* receive numeric values */
3091     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3092 
3093     /* receive message of values*/
3094     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3095 
3096     /* insert into matrix */
3097     jj      = rstart;
3098     smycols = mycols;
3099     svals   = vals;
3100     for (i=0; i<m; i++) {
3101       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3102       smycols += ourlens[i];
3103       svals   += ourlens[i];
3104       jj++;
3105     }
3106   }
3107   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3108   ierr = PetscFree(vals);CHKERRQ(ierr);
3109   ierr = PetscFree(mycols);CHKERRQ(ierr);
3110   ierr = PetscFree(rowners);CHKERRQ(ierr);
3111   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3112   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3113   PetscFunctionReturn(0);
3114 }
3115 
3116 /* Not scalable because of ISAllGather() unless getting all columns. */
3117 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3118 {
3119   PetscErrorCode ierr;
3120   IS             iscol_local;
3121   PetscBool      isstride;
3122   PetscMPIInt    lisstride=0,gisstride;
3123 
3124   PetscFunctionBegin;
3125   /* check if we are grabbing all columns*/
3126   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3127 
3128   if (isstride) {
3129     PetscInt  start,len,mstart,mlen;
3130     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3131     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3132     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3133     if (mstart == start && mlen-mstart == len) lisstride = 1;
3134   }
3135 
3136   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3137   if (gisstride) {
3138     PetscInt N;
3139     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3140     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3141     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3142     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3143   } else {
3144     PetscInt cbs;
3145     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3146     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3147     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3148   }
3149 
3150   *isseq = iscol_local;
3151   PetscFunctionReturn(0);
3152 }
3153 
3154 /*
3155  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3156  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3157 
3158  Input Parameters:
3159    mat - matrix
3160    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3161            i.e., mat->rstart <= isrow[i] < mat->rend
3162    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3163            i.e., mat->cstart <= iscol[i] < mat->cend
3164  Output Parameter:
3165    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3166    iscol_o - sequential column index set for retrieving mat->B
3167    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3168  */
3169 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3170 {
3171   PetscErrorCode ierr;
3172   Vec            x,cmap;
3173   const PetscInt *is_idx;
3174   PetscScalar    *xarray,*cmaparray;
3175   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3176   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3177   Mat            B=a->B;
3178   Vec            lvec=a->lvec,lcmap;
3179   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3180   MPI_Comm       comm;
3181   VecScatter     Mvctx=a->Mvctx;
3182 
3183   PetscFunctionBegin;
3184   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3185   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3186 
3187   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3188   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3189   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3190   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3191   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3192 
3193   /* Get start indices */
3194   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3195   isstart -= ncols;
3196   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3197 
3198   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3199   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3200   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3201   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3202   for (i=0; i<ncols; i++) {
3203     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3204     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3205     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3206   }
3207   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3208   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3209   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3210 
3211   /* Get iscol_d */
3212   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3213   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3214   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3215 
3216   /* Get isrow_d */
3217   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3218   rstart = mat->rmap->rstart;
3219   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3220   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3221   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3222   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3223 
3224   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3225   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3226   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3227 
3228   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3229   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3230   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3231 
3232   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3233 
3234   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3235   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3236 
3237   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3238   /* off-process column indices */
3239   count = 0;
3240   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3241   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3242 
3243   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3244   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3245   for (i=0; i<Bn; i++) {
3246     if (PetscRealPart(xarray[i]) > -1.0) {
3247       idx[count]     = i;                   /* local column index in off-diagonal part B */
3248       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3249       count++;
3250     }
3251   }
3252   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3253   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3254 
3255   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3256   /* cannot ensure iscol_o has same blocksize as iscol! */
3257 
3258   ierr = PetscFree(idx);CHKERRQ(ierr);
3259   *garray = cmap1;
3260 
3261   ierr = VecDestroy(&x);CHKERRQ(ierr);
3262   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3263   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3264   PetscFunctionReturn(0);
3265 }
3266 
3267 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3268 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3269 {
3270   PetscErrorCode ierr;
3271   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3272   Mat            M = NULL;
3273   MPI_Comm       comm;
3274   IS             iscol_d,isrow_d,iscol_o;
3275   Mat            Asub = NULL,Bsub = NULL;
3276   PetscInt       n;
3277 
3278   PetscFunctionBegin;
3279   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3280 
3281   if (call == MAT_REUSE_MATRIX) {
3282     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3283     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3284     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3285 
3286     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3287     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3288 
3289     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3290     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3291 
3292     /* Update diagonal and off-diagonal portions of submat */
3293     asub = (Mat_MPIAIJ*)(*submat)->data;
3294     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3295     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3296     if (n) {
3297       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3298     }
3299     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3300     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3301 
3302   } else { /* call == MAT_INITIAL_MATRIX) */
3303     const PetscInt *garray;
3304     PetscInt        BsubN;
3305 
3306     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3307     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3308 
3309     /* Create local submatrices Asub and Bsub */
3310     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3311     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3312 
3313     /* Create submatrix M */
3314     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3315 
3316     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3317     asub = (Mat_MPIAIJ*)M->data;
3318 
3319     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3320     n = asub->B->cmap->N;
3321     if (BsubN > n) {
3322       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3323       const PetscInt *idx;
3324       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3325       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3326 
3327       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3328       j = 0;
3329       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3330       for (i=0; i<n; i++) {
3331         if (j >= BsubN) break;
3332         while (subgarray[i] > garray[j]) j++;
3333 
3334         if (subgarray[i] == garray[j]) {
3335           idx_new[i] = idx[j++];
3336         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3337       }
3338       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3339 
3340       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3341       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3342 
3343     } else if (BsubN < n) {
3344       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3345     }
3346 
3347     ierr = PetscFree(garray);CHKERRQ(ierr);
3348     *submat = M;
3349 
3350     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3351     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3352     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3353 
3354     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3355     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3356 
3357     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3358     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3359   }
3360   PetscFunctionReturn(0);
3361 }
3362 
3363 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3364 {
3365   PetscErrorCode ierr;
3366   IS             iscol_local=NULL,isrow_d;
3367   PetscInt       csize;
3368   PetscInt       n,i,j,start,end;
3369   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3370   MPI_Comm       comm;
3371 
3372   PetscFunctionBegin;
3373   /* If isrow has same processor distribution as mat,
3374      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3375   if (call == MAT_REUSE_MATRIX) {
3376     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3377     if (isrow_d) {
3378       sameRowDist  = PETSC_TRUE;
3379       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3380     } else {
3381       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3382       if (iscol_local) {
3383         sameRowDist  = PETSC_TRUE;
3384         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3385       }
3386     }
3387   } else {
3388     /* Check if isrow has same processor distribution as mat */
3389     sameDist[0] = PETSC_FALSE;
3390     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3391     if (!n) {
3392       sameDist[0] = PETSC_TRUE;
3393     } else {
3394       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3395       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3396       if (i >= start && j < end) {
3397         sameDist[0] = PETSC_TRUE;
3398       }
3399     }
3400 
3401     /* Check if iscol has same processor distribution as mat */
3402     sameDist[1] = PETSC_FALSE;
3403     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3404     if (!n) {
3405       sameDist[1] = PETSC_TRUE;
3406     } else {
3407       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3408       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3409       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3410     }
3411 
3412     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3413     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3414     sameRowDist = tsameDist[0];
3415   }
3416 
3417   if (sameRowDist) {
3418     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3419       /* isrow and iscol have same processor distribution as mat */
3420       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3421       PetscFunctionReturn(0);
3422     } else { /* sameRowDist */
3423       /* isrow has same processor distribution as mat */
3424       if (call == MAT_INITIAL_MATRIX) {
3425         PetscBool sorted;
3426         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3427         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3428         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3429         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3430 
3431         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3432         if (sorted) {
3433           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3434           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3435           PetscFunctionReturn(0);
3436         }
3437       } else { /* call == MAT_REUSE_MATRIX */
3438         IS    iscol_sub;
3439         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3440         if (iscol_sub) {
3441           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3442           PetscFunctionReturn(0);
3443         }
3444       }
3445     }
3446   }
3447 
3448   /* General case: iscol -> iscol_local which has global size of iscol */
3449   if (call == MAT_REUSE_MATRIX) {
3450     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3451     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3452   } else {
3453     if (!iscol_local) {
3454       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3455     }
3456   }
3457 
3458   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3459   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3460 
3461   if (call == MAT_INITIAL_MATRIX) {
3462     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3463     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3464   }
3465   PetscFunctionReturn(0);
3466 }
3467 
3468 /*@C
3469      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3470          and "off-diagonal" part of the matrix in CSR format.
3471 
3472    Collective on MPI_Comm
3473 
3474    Input Parameters:
3475 +  comm - MPI communicator
3476 .  A - "diagonal" portion of matrix
3477 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3478 -  garray - global index of B columns
3479 
3480    Output Parameter:
3481 .   mat - the matrix, with input A as its local diagonal matrix
3482    Level: advanced
3483 
3484    Notes:
3485        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3486        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3487 
3488 .seealso: MatCreateMPIAIJWithSplitArrays()
3489 @*/
3490 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3491 {
3492   PetscErrorCode ierr;
3493   Mat_MPIAIJ     *maij;
3494   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3495   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3496   PetscScalar    *oa=b->a;
3497   Mat            Bnew;
3498   PetscInt       m,n,N;
3499 
3500   PetscFunctionBegin;
3501   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3502   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3503   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3504   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3505   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3506   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3507 
3508   /* Get global columns of mat */
3509   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3510 
3511   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3512   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3513   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3514   maij = (Mat_MPIAIJ*)(*mat)->data;
3515 
3516   (*mat)->preallocated = PETSC_TRUE;
3517 
3518   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3519   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3520 
3521   /* Set A as diagonal portion of *mat */
3522   maij->A = A;
3523 
3524   nz = oi[m];
3525   for (i=0; i<nz; i++) {
3526     col   = oj[i];
3527     oj[i] = garray[col];
3528   }
3529 
3530    /* Set Bnew as off-diagonal portion of *mat */
3531   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3532   bnew        = (Mat_SeqAIJ*)Bnew->data;
3533   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3534   maij->B     = Bnew;
3535 
3536   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3537 
3538   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3539   b->free_a       = PETSC_FALSE;
3540   b->free_ij      = PETSC_FALSE;
3541   ierr = MatDestroy(&B);CHKERRQ(ierr);
3542 
3543   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3544   bnew->free_a       = PETSC_TRUE;
3545   bnew->free_ij      = PETSC_TRUE;
3546 
3547   /* condense columns of maij->B */
3548   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3549   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3550   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3551   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3552   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3553   PetscFunctionReturn(0);
3554 }
3555 
3556 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3557 
3558 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3559 {
3560   PetscErrorCode ierr;
3561   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3562   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3563   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3564   Mat            M,Msub,B=a->B;
3565   MatScalar      *aa;
3566   Mat_SeqAIJ     *aij;
3567   PetscInt       *garray = a->garray,*colsub,Ncols;
3568   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3569   IS             iscol_sub,iscmap;
3570   const PetscInt *is_idx,*cmap;
3571   PetscBool      allcolumns=PETSC_FALSE;
3572   MPI_Comm       comm;
3573 
3574   PetscFunctionBegin;
3575   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3576 
3577   if (call == MAT_REUSE_MATRIX) {
3578     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3579     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3580     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3581 
3582     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3583     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3584 
3585     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3586     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3587 
3588     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3589 
3590   } else { /* call == MAT_INITIAL_MATRIX) */
3591     PetscBool flg;
3592 
3593     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3594     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3595 
3596     /* (1) iscol -> nonscalable iscol_local */
3597     /* Check for special case: each processor gets entire matrix columns */
3598     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3599     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3600     if (allcolumns) {
3601       iscol_sub = iscol_local;
3602       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3603       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3604 
3605     } else {
3606       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3607       PetscInt *idx,*cmap1,k;
3608       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3609       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3610       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3611       count = 0;
3612       k     = 0;
3613       for (i=0; i<Ncols; i++) {
3614         j = is_idx[i];
3615         if (j >= cstart && j < cend) {
3616           /* diagonal part of mat */
3617           idx[count]     = j;
3618           cmap1[count++] = i; /* column index in submat */
3619         } else if (Bn) {
3620           /* off-diagonal part of mat */
3621           if (j == garray[k]) {
3622             idx[count]     = j;
3623             cmap1[count++] = i;  /* column index in submat */
3624           } else if (j > garray[k]) {
3625             while (j > garray[k] && k < Bn-1) k++;
3626             if (j == garray[k]) {
3627               idx[count]     = j;
3628               cmap1[count++] = i; /* column index in submat */
3629             }
3630           }
3631         }
3632       }
3633       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3634 
3635       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3636       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3637       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3638 
3639       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3640     }
3641 
3642     /* (3) Create sequential Msub */
3643     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3644   }
3645 
3646   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3647   aij  = (Mat_SeqAIJ*)(Msub)->data;
3648   ii   = aij->i;
3649   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3650 
3651   /*
3652       m - number of local rows
3653       Ncols - number of columns (same on all processors)
3654       rstart - first row in new global matrix generated
3655   */
3656   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3657 
3658   if (call == MAT_INITIAL_MATRIX) {
3659     /* (4) Create parallel newmat */
3660     PetscMPIInt    rank,size;
3661     PetscInt       csize;
3662 
3663     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3664     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3665 
3666     /*
3667         Determine the number of non-zeros in the diagonal and off-diagonal
3668         portions of the matrix in order to do correct preallocation
3669     */
3670 
3671     /* first get start and end of "diagonal" columns */
3672     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3673     if (csize == PETSC_DECIDE) {
3674       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3675       if (mglobal == Ncols) { /* square matrix */
3676         nlocal = m;
3677       } else {
3678         nlocal = Ncols/size + ((Ncols % size) > rank);
3679       }
3680     } else {
3681       nlocal = csize;
3682     }
3683     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3684     rstart = rend - nlocal;
3685     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3686 
3687     /* next, compute all the lengths */
3688     jj    = aij->j;
3689     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3690     olens = dlens + m;
3691     for (i=0; i<m; i++) {
3692       jend = ii[i+1] - ii[i];
3693       olen = 0;
3694       dlen = 0;
3695       for (j=0; j<jend; j++) {
3696         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3697         else dlen++;
3698         jj++;
3699       }
3700       olens[i] = olen;
3701       dlens[i] = dlen;
3702     }
3703 
3704     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3705     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3706 
3707     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3708     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3709     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3710     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3711     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3712     ierr = PetscFree(dlens);CHKERRQ(ierr);
3713 
3714   } else { /* call == MAT_REUSE_MATRIX */
3715     M    = *newmat;
3716     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3717     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3718     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3719     /*
3720          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3721        rather than the slower MatSetValues().
3722     */
3723     M->was_assembled = PETSC_TRUE;
3724     M->assembled     = PETSC_FALSE;
3725   }
3726 
3727   /* (5) Set values of Msub to *newmat */
3728   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3729   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3730 
3731   jj   = aij->j;
3732   aa   = aij->a;
3733   for (i=0; i<m; i++) {
3734     row = rstart + i;
3735     nz  = ii[i+1] - ii[i];
3736     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3737     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3738     jj += nz; aa += nz;
3739   }
3740   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3741 
3742   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3743   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3744 
3745   ierr = PetscFree(colsub);CHKERRQ(ierr);
3746 
3747   /* save Msub, iscol_sub and iscmap used in processor for next request */
3748   if (call ==  MAT_INITIAL_MATRIX) {
3749     *newmat = M;
3750     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3751     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3752 
3753     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3754     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3755 
3756     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3757     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3758 
3759     if (iscol_local) {
3760       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3761       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3762     }
3763   }
3764   PetscFunctionReturn(0);
3765 }
3766 
3767 /*
3768     Not great since it makes two copies of the submatrix, first an SeqAIJ
3769   in local and then by concatenating the local matrices the end result.
3770   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3771 
3772   Note: This requires a sequential iscol with all indices.
3773 */
3774 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3775 {
3776   PetscErrorCode ierr;
3777   PetscMPIInt    rank,size;
3778   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3779   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3780   Mat            M,Mreuse;
3781   MatScalar      *aa,*vwork;
3782   MPI_Comm       comm;
3783   Mat_SeqAIJ     *aij;
3784   PetscBool      colflag,allcolumns=PETSC_FALSE;
3785 
3786   PetscFunctionBegin;
3787   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3788   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3789   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3790 
3791   /* Check for special case: each processor gets entire matrix columns */
3792   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3793   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3794   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3795 
3796   if (call ==  MAT_REUSE_MATRIX) {
3797     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3798     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3799     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3800   } else {
3801     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3802   }
3803 
3804   /*
3805       m - number of local rows
3806       n - number of columns (same on all processors)
3807       rstart - first row in new global matrix generated
3808   */
3809   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3810   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3811   if (call == MAT_INITIAL_MATRIX) {
3812     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3813     ii  = aij->i;
3814     jj  = aij->j;
3815 
3816     /*
3817         Determine the number of non-zeros in the diagonal and off-diagonal
3818         portions of the matrix in order to do correct preallocation
3819     */
3820 
3821     /* first get start and end of "diagonal" columns */
3822     if (csize == PETSC_DECIDE) {
3823       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3824       if (mglobal == n) { /* square matrix */
3825         nlocal = m;
3826       } else {
3827         nlocal = n/size + ((n % size) > rank);
3828       }
3829     } else {
3830       nlocal = csize;
3831     }
3832     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3833     rstart = rend - nlocal;
3834     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3835 
3836     /* next, compute all the lengths */
3837     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3838     olens = dlens + m;
3839     for (i=0; i<m; i++) {
3840       jend = ii[i+1] - ii[i];
3841       olen = 0;
3842       dlen = 0;
3843       for (j=0; j<jend; j++) {
3844         if (*jj < rstart || *jj >= rend) olen++;
3845         else dlen++;
3846         jj++;
3847       }
3848       olens[i] = olen;
3849       dlens[i] = dlen;
3850     }
3851     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3852     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3853     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3854     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3855     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3856     ierr = PetscFree(dlens);CHKERRQ(ierr);
3857   } else {
3858     PetscInt ml,nl;
3859 
3860     M    = *newmat;
3861     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3862     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3863     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3864     /*
3865          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3866        rather than the slower MatSetValues().
3867     */
3868     M->was_assembled = PETSC_TRUE;
3869     M->assembled     = PETSC_FALSE;
3870   }
3871   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3872   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3873   ii   = aij->i;
3874   jj   = aij->j;
3875   aa   = aij->a;
3876   for (i=0; i<m; i++) {
3877     row   = rstart + i;
3878     nz    = ii[i+1] - ii[i];
3879     cwork = jj;     jj += nz;
3880     vwork = aa;     aa += nz;
3881     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3882   }
3883 
3884   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3885   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3886   *newmat = M;
3887 
3888   /* save submatrix used in processor for next request */
3889   if (call ==  MAT_INITIAL_MATRIX) {
3890     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3891     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3892   }
3893   PetscFunctionReturn(0);
3894 }
3895 
3896 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3897 {
3898   PetscInt       m,cstart, cend,j,nnz,i,d;
3899   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3900   const PetscInt *JJ;
3901   PetscScalar    *values;
3902   PetscErrorCode ierr;
3903   PetscBool      nooffprocentries;
3904 
3905   PetscFunctionBegin;
3906   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3907 
3908   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3909   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3910   m      = B->rmap->n;
3911   cstart = B->cmap->rstart;
3912   cend   = B->cmap->rend;
3913   rstart = B->rmap->rstart;
3914 
3915   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3916 
3917 #if defined(PETSC_USE_DEBUG)
3918   for (i=0; i<m && Ii; i++) {
3919     nnz = Ii[i+1]- Ii[i];
3920     JJ  = J + Ii[i];
3921     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3922     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3923     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3924   }
3925 #endif
3926 
3927   for (i=0; i<m && Ii; i++) {
3928     nnz     = Ii[i+1]- Ii[i];
3929     JJ      = J + Ii[i];
3930     nnz_max = PetscMax(nnz_max,nnz);
3931     d       = 0;
3932     for (j=0; j<nnz; j++) {
3933       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3934     }
3935     d_nnz[i] = d;
3936     o_nnz[i] = nnz - d;
3937   }
3938   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3939   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3940 
3941   if (v) values = (PetscScalar*)v;
3942   else {
3943     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3944   }
3945 
3946   for (i=0; i<m && Ii; i++) {
3947     ii   = i + rstart;
3948     nnz  = Ii[i+1]- Ii[i];
3949     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3950   }
3951   nooffprocentries    = B->nooffprocentries;
3952   B->nooffprocentries = PETSC_TRUE;
3953   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3954   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3955   B->nooffprocentries = nooffprocentries;
3956 
3957   if (!v) {
3958     ierr = PetscFree(values);CHKERRQ(ierr);
3959   }
3960   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3961   PetscFunctionReturn(0);
3962 }
3963 
3964 /*@
3965    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3966    (the default parallel PETSc format).
3967 
3968    Collective on MPI_Comm
3969 
3970    Input Parameters:
3971 +  B - the matrix
3972 .  i - the indices into j for the start of each local row (starts with zero)
3973 .  j - the column indices for each local row (starts with zero)
3974 -  v - optional values in the matrix
3975 
3976    Level: developer
3977 
3978    Notes:
3979        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3980      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3981      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3982 
3983        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3984 
3985        The format which is used for the sparse matrix input, is equivalent to a
3986     row-major ordering.. i.e for the following matrix, the input data expected is
3987     as shown
3988 
3989 $        1 0 0
3990 $        2 0 3     P0
3991 $       -------
3992 $        4 5 6     P1
3993 $
3994 $     Process0 [P0]: rows_owned=[0,1]
3995 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3996 $        j =  {0,0,2}  [size = 3]
3997 $        v =  {1,2,3}  [size = 3]
3998 $
3999 $     Process1 [P1]: rows_owned=[2]
4000 $        i =  {0,3}    [size = nrow+1  = 1+1]
4001 $        j =  {0,1,2}  [size = 3]
4002 $        v =  {4,5,6}  [size = 3]
4003 
4004 .keywords: matrix, aij, compressed row, sparse, parallel
4005 
4006 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4007           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4008 @*/
4009 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4010 {
4011   PetscErrorCode ierr;
4012 
4013   PetscFunctionBegin;
4014   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4015   PetscFunctionReturn(0);
4016 }
4017 
4018 /*@C
4019    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4020    (the default parallel PETSc format).  For good matrix assembly performance
4021    the user should preallocate the matrix storage by setting the parameters
4022    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4023    performance can be increased by more than a factor of 50.
4024 
4025    Collective on MPI_Comm
4026 
4027    Input Parameters:
4028 +  B - the matrix
4029 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4030            (same value is used for all local rows)
4031 .  d_nnz - array containing the number of nonzeros in the various rows of the
4032            DIAGONAL portion of the local submatrix (possibly different for each row)
4033            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4034            The size of this array is equal to the number of local rows, i.e 'm'.
4035            For matrices that will be factored, you must leave room for (and set)
4036            the diagonal entry even if it is zero.
4037 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4038            submatrix (same value is used for all local rows).
4039 -  o_nnz - array containing the number of nonzeros in the various rows of the
4040            OFF-DIAGONAL portion of the local submatrix (possibly different for
4041            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4042            structure. The size of this array is equal to the number
4043            of local rows, i.e 'm'.
4044 
4045    If the *_nnz parameter is given then the *_nz parameter is ignored
4046 
4047    The AIJ format (also called the Yale sparse matrix format or
4048    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4049    storage.  The stored row and column indices begin with zero.
4050    See Users-Manual: ch_mat for details.
4051 
4052    The parallel matrix is partitioned such that the first m0 rows belong to
4053    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4054    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4055 
4056    The DIAGONAL portion of the local submatrix of a processor can be defined
4057    as the submatrix which is obtained by extraction the part corresponding to
4058    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4059    first row that belongs to the processor, r2 is the last row belonging to
4060    the this processor, and c1-c2 is range of indices of the local part of a
4061    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4062    common case of a square matrix, the row and column ranges are the same and
4063    the DIAGONAL part is also square. The remaining portion of the local
4064    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4065 
4066    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4067 
4068    You can call MatGetInfo() to get information on how effective the preallocation was;
4069    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4070    You can also run with the option -info and look for messages with the string
4071    malloc in them to see if additional memory allocation was needed.
4072 
4073    Example usage:
4074 
4075    Consider the following 8x8 matrix with 34 non-zero values, that is
4076    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4077    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4078    as follows:
4079 
4080 .vb
4081             1  2  0  |  0  3  0  |  0  4
4082     Proc0   0  5  6  |  7  0  0  |  8  0
4083             9  0 10  | 11  0  0  | 12  0
4084     -------------------------------------
4085            13  0 14  | 15 16 17  |  0  0
4086     Proc1   0 18  0  | 19 20 21  |  0  0
4087             0  0  0  | 22 23  0  | 24  0
4088     -------------------------------------
4089     Proc2  25 26 27  |  0  0 28  | 29  0
4090            30  0  0  | 31 32 33  |  0 34
4091 .ve
4092 
4093    This can be represented as a collection of submatrices as:
4094 
4095 .vb
4096       A B C
4097       D E F
4098       G H I
4099 .ve
4100 
4101    Where the submatrices A,B,C are owned by proc0, D,E,F are
4102    owned by proc1, G,H,I are owned by proc2.
4103 
4104    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4105    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4106    The 'M','N' parameters are 8,8, and have the same values on all procs.
4107 
4108    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4109    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4110    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4111    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4112    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4113    matrix, ans [DF] as another SeqAIJ matrix.
4114 
4115    When d_nz, o_nz parameters are specified, d_nz storage elements are
4116    allocated for every row of the local diagonal submatrix, and o_nz
4117    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4118    One way to choose d_nz and o_nz is to use the max nonzerors per local
4119    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4120    In this case, the values of d_nz,o_nz are:
4121 .vb
4122      proc0 : dnz = 2, o_nz = 2
4123      proc1 : dnz = 3, o_nz = 2
4124      proc2 : dnz = 1, o_nz = 4
4125 .ve
4126    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4127    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4128    for proc3. i.e we are using 12+15+10=37 storage locations to store
4129    34 values.
4130 
4131    When d_nnz, o_nnz parameters are specified, the storage is specified
4132    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4133    In the above case the values for d_nnz,o_nnz are:
4134 .vb
4135      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4136      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4137      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4138 .ve
4139    Here the space allocated is sum of all the above values i.e 34, and
4140    hence pre-allocation is perfect.
4141 
4142    Level: intermediate
4143 
4144 .keywords: matrix, aij, compressed row, sparse, parallel
4145 
4146 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4147           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4148 @*/
4149 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4150 {
4151   PetscErrorCode ierr;
4152 
4153   PetscFunctionBegin;
4154   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4155   PetscValidType(B,1);
4156   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4157   PetscFunctionReturn(0);
4158 }
4159 
4160 /*@
4161      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4162          CSR format the local rows.
4163 
4164    Collective on MPI_Comm
4165 
4166    Input Parameters:
4167 +  comm - MPI communicator
4168 .  m - number of local rows (Cannot be PETSC_DECIDE)
4169 .  n - This value should be the same as the local size used in creating the
4170        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4171        calculated if N is given) For square matrices n is almost always m.
4172 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4173 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4174 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4175 .   j - column indices
4176 -   a - matrix values
4177 
4178    Output Parameter:
4179 .   mat - the matrix
4180 
4181    Level: intermediate
4182 
4183    Notes:
4184        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4185      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4186      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4187 
4188        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4189 
4190        The format which is used for the sparse matrix input, is equivalent to a
4191     row-major ordering.. i.e for the following matrix, the input data expected is
4192     as shown
4193 
4194 $        1 0 0
4195 $        2 0 3     P0
4196 $       -------
4197 $        4 5 6     P1
4198 $
4199 $     Process0 [P0]: rows_owned=[0,1]
4200 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4201 $        j =  {0,0,2}  [size = 3]
4202 $        v =  {1,2,3}  [size = 3]
4203 $
4204 $     Process1 [P1]: rows_owned=[2]
4205 $        i =  {0,3}    [size = nrow+1  = 1+1]
4206 $        j =  {0,1,2}  [size = 3]
4207 $        v =  {4,5,6}  [size = 3]
4208 
4209 .keywords: matrix, aij, compressed row, sparse, parallel
4210 
4211 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4212           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4213 @*/
4214 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4215 {
4216   PetscErrorCode ierr;
4217 
4218   PetscFunctionBegin;
4219   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4220   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4221   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4222   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4223   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4224   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4225   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4226   PetscFunctionReturn(0);
4227 }
4228 
4229 /*@C
4230    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4231    (the default parallel PETSc format).  For good matrix assembly performance
4232    the user should preallocate the matrix storage by setting the parameters
4233    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4234    performance can be increased by more than a factor of 50.
4235 
4236    Collective on MPI_Comm
4237 
4238    Input Parameters:
4239 +  comm - MPI communicator
4240 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4241            This value should be the same as the local size used in creating the
4242            y vector for the matrix-vector product y = Ax.
4243 .  n - This value should be the same as the local size used in creating the
4244        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4245        calculated if N is given) For square matrices n is almost always m.
4246 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4247 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4248 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4249            (same value is used for all local rows)
4250 .  d_nnz - array containing the number of nonzeros in the various rows of the
4251            DIAGONAL portion of the local submatrix (possibly different for each row)
4252            or NULL, if d_nz is used to specify the nonzero structure.
4253            The size of this array is equal to the number of local rows, i.e 'm'.
4254 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4255            submatrix (same value is used for all local rows).
4256 -  o_nnz - array containing the number of nonzeros in the various rows of the
4257            OFF-DIAGONAL portion of the local submatrix (possibly different for
4258            each row) or NULL, if o_nz is used to specify the nonzero
4259            structure. The size of this array is equal to the number
4260            of local rows, i.e 'm'.
4261 
4262    Output Parameter:
4263 .  A - the matrix
4264 
4265    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4266    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4267    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4268 
4269    Notes:
4270    If the *_nnz parameter is given then the *_nz parameter is ignored
4271 
4272    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4273    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4274    storage requirements for this matrix.
4275 
4276    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4277    processor than it must be used on all processors that share the object for
4278    that argument.
4279 
4280    The user MUST specify either the local or global matrix dimensions
4281    (possibly both).
4282 
4283    The parallel matrix is partitioned across processors such that the
4284    first m0 rows belong to process 0, the next m1 rows belong to
4285    process 1, the next m2 rows belong to process 2 etc.. where
4286    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4287    values corresponding to [m x N] submatrix.
4288 
4289    The columns are logically partitioned with the n0 columns belonging
4290    to 0th partition, the next n1 columns belonging to the next
4291    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4292 
4293    The DIAGONAL portion of the local submatrix on any given processor
4294    is the submatrix corresponding to the rows and columns m,n
4295    corresponding to the given processor. i.e diagonal matrix on
4296    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4297    etc. The remaining portion of the local submatrix [m x (N-n)]
4298    constitute the OFF-DIAGONAL portion. The example below better
4299    illustrates this concept.
4300 
4301    For a square global matrix we define each processor's diagonal portion
4302    to be its local rows and the corresponding columns (a square submatrix);
4303    each processor's off-diagonal portion encompasses the remainder of the
4304    local matrix (a rectangular submatrix).
4305 
4306    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4307 
4308    When calling this routine with a single process communicator, a matrix of
4309    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4310    type of communicator, use the construction mechanism
4311 .vb
4312      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4313 .ve
4314 
4315 $     MatCreate(...,&A);
4316 $     MatSetType(A,MATMPIAIJ);
4317 $     MatSetSizes(A, m,n,M,N);
4318 $     MatMPIAIJSetPreallocation(A,...);
4319 
4320    By default, this format uses inodes (identical nodes) when possible.
4321    We search for consecutive rows with the same nonzero structure, thereby
4322    reusing matrix information to achieve increased efficiency.
4323 
4324    Options Database Keys:
4325 +  -mat_no_inode  - Do not use inodes
4326 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4327 
4328 
4329 
4330    Example usage:
4331 
4332    Consider the following 8x8 matrix with 34 non-zero values, that is
4333    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4334    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4335    as follows
4336 
4337 .vb
4338             1  2  0  |  0  3  0  |  0  4
4339     Proc0   0  5  6  |  7  0  0  |  8  0
4340             9  0 10  | 11  0  0  | 12  0
4341     -------------------------------------
4342            13  0 14  | 15 16 17  |  0  0
4343     Proc1   0 18  0  | 19 20 21  |  0  0
4344             0  0  0  | 22 23  0  | 24  0
4345     -------------------------------------
4346     Proc2  25 26 27  |  0  0 28  | 29  0
4347            30  0  0  | 31 32 33  |  0 34
4348 .ve
4349 
4350    This can be represented as a collection of submatrices as
4351 
4352 .vb
4353       A B C
4354       D E F
4355       G H I
4356 .ve
4357 
4358    Where the submatrices A,B,C are owned by proc0, D,E,F are
4359    owned by proc1, G,H,I are owned by proc2.
4360 
4361    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4362    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4363    The 'M','N' parameters are 8,8, and have the same values on all procs.
4364 
4365    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4366    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4367    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4368    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4369    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4370    matrix, ans [DF] as another SeqAIJ matrix.
4371 
4372    When d_nz, o_nz parameters are specified, d_nz storage elements are
4373    allocated for every row of the local diagonal submatrix, and o_nz
4374    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4375    One way to choose d_nz and o_nz is to use the max nonzerors per local
4376    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4377    In this case, the values of d_nz,o_nz are
4378 .vb
4379      proc0 : dnz = 2, o_nz = 2
4380      proc1 : dnz = 3, o_nz = 2
4381      proc2 : dnz = 1, o_nz = 4
4382 .ve
4383    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4384    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4385    for proc3. i.e we are using 12+15+10=37 storage locations to store
4386    34 values.
4387 
4388    When d_nnz, o_nnz parameters are specified, the storage is specified
4389    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4390    In the above case the values for d_nnz,o_nnz are
4391 .vb
4392      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4393      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4394      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4395 .ve
4396    Here the space allocated is sum of all the above values i.e 34, and
4397    hence pre-allocation is perfect.
4398 
4399    Level: intermediate
4400 
4401 .keywords: matrix, aij, compressed row, sparse, parallel
4402 
4403 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4404           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4405 @*/
4406 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4407 {
4408   PetscErrorCode ierr;
4409   PetscMPIInt    size;
4410 
4411   PetscFunctionBegin;
4412   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4413   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4414   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4415   if (size > 1) {
4416     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4417     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4418   } else {
4419     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4420     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4421   }
4422   PetscFunctionReturn(0);
4423 }
4424 
4425 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4426 {
4427   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4428   PetscBool      flg;
4429   PetscErrorCode ierr;
4430 
4431   PetscFunctionBegin;
4432   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4433   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4434   if (Ad)     *Ad     = a->A;
4435   if (Ao)     *Ao     = a->B;
4436   if (colmap) *colmap = a->garray;
4437   PetscFunctionReturn(0);
4438 }
4439 
4440 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4441 {
4442   PetscErrorCode ierr;
4443   PetscInt       m,N,i,rstart,nnz,Ii;
4444   PetscInt       *indx;
4445   PetscScalar    *values;
4446 
4447   PetscFunctionBegin;
4448   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4449   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4450     PetscInt       *dnz,*onz,sum,bs,cbs;
4451 
4452     if (n == PETSC_DECIDE) {
4453       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4454     }
4455     /* Check sum(n) = N */
4456     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4457     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4458 
4459     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4460     rstart -= m;
4461 
4462     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4463     for (i=0; i<m; i++) {
4464       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4465       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4466       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4467     }
4468 
4469     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4470     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4471     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4472     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4473     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4474     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4475     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4476     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4477   }
4478 
4479   /* numeric phase */
4480   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4481   for (i=0; i<m; i++) {
4482     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4483     Ii   = i + rstart;
4484     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4485     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4486   }
4487   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4488   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4489   PetscFunctionReturn(0);
4490 }
4491 
4492 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4493 {
4494   PetscErrorCode    ierr;
4495   PetscMPIInt       rank;
4496   PetscInt          m,N,i,rstart,nnz;
4497   size_t            len;
4498   const PetscInt    *indx;
4499   PetscViewer       out;
4500   char              *name;
4501   Mat               B;
4502   const PetscScalar *values;
4503 
4504   PetscFunctionBegin;
4505   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4506   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4507   /* Should this be the type of the diagonal block of A? */
4508   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4509   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4510   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4511   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4512   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4513   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4514   for (i=0; i<m; i++) {
4515     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4516     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4517     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4518   }
4519   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4520   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4521 
4522   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4523   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4524   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4525   sprintf(name,"%s.%d",outfile,rank);
4526   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4527   ierr = PetscFree(name);CHKERRQ(ierr);
4528   ierr = MatView(B,out);CHKERRQ(ierr);
4529   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4530   ierr = MatDestroy(&B);CHKERRQ(ierr);
4531   PetscFunctionReturn(0);
4532 }
4533 
4534 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4535 {
4536   PetscErrorCode      ierr;
4537   Mat_Merge_SeqsToMPI *merge;
4538   PetscContainer      container;
4539 
4540   PetscFunctionBegin;
4541   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4542   if (container) {
4543     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4544     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4545     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4546     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4547     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4548     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4555     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4556     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4557     ierr = PetscFree(merge);CHKERRQ(ierr);
4558     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4559   }
4560   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4561   PetscFunctionReturn(0);
4562 }
4563 
4564 #include <../src/mat/utils/freespace.h>
4565 #include <petscbt.h>
4566 
4567 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4568 {
4569   PetscErrorCode      ierr;
4570   MPI_Comm            comm;
4571   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4572   PetscMPIInt         size,rank,taga,*len_s;
4573   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4574   PetscInt            proc,m;
4575   PetscInt            **buf_ri,**buf_rj;
4576   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4577   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4578   MPI_Request         *s_waits,*r_waits;
4579   MPI_Status          *status;
4580   MatScalar           *aa=a->a;
4581   MatScalar           **abuf_r,*ba_i;
4582   Mat_Merge_SeqsToMPI *merge;
4583   PetscContainer      container;
4584 
4585   PetscFunctionBegin;
4586   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4587   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4588 
4589   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4590   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4591 
4592   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4593   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4594 
4595   bi     = merge->bi;
4596   bj     = merge->bj;
4597   buf_ri = merge->buf_ri;
4598   buf_rj = merge->buf_rj;
4599 
4600   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4601   owners = merge->rowmap->range;
4602   len_s  = merge->len_s;
4603 
4604   /* send and recv matrix values */
4605   /*-----------------------------*/
4606   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4607   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4608 
4609   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4610   for (proc=0,k=0; proc<size; proc++) {
4611     if (!len_s[proc]) continue;
4612     i    = owners[proc];
4613     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4614     k++;
4615   }
4616 
4617   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4618   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4619   ierr = PetscFree(status);CHKERRQ(ierr);
4620 
4621   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4622   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4623 
4624   /* insert mat values of mpimat */
4625   /*----------------------------*/
4626   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4627   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4628 
4629   for (k=0; k<merge->nrecv; k++) {
4630     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4631     nrows       = *(buf_ri_k[k]);
4632     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4633     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4634   }
4635 
4636   /* set values of ba */
4637   m = merge->rowmap->n;
4638   for (i=0; i<m; i++) {
4639     arow = owners[rank] + i;
4640     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4641     bnzi = bi[i+1] - bi[i];
4642     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4643 
4644     /* add local non-zero vals of this proc's seqmat into ba */
4645     anzi   = ai[arow+1] - ai[arow];
4646     aj     = a->j + ai[arow];
4647     aa     = a->a + ai[arow];
4648     nextaj = 0;
4649     for (j=0; nextaj<anzi; j++) {
4650       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4651         ba_i[j] += aa[nextaj++];
4652       }
4653     }
4654 
4655     /* add received vals into ba */
4656     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4657       /* i-th row */
4658       if (i == *nextrow[k]) {
4659         anzi   = *(nextai[k]+1) - *nextai[k];
4660         aj     = buf_rj[k] + *(nextai[k]);
4661         aa     = abuf_r[k] + *(nextai[k]);
4662         nextaj = 0;
4663         for (j=0; nextaj<anzi; j++) {
4664           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4665             ba_i[j] += aa[nextaj++];
4666           }
4667         }
4668         nextrow[k]++; nextai[k]++;
4669       }
4670     }
4671     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4672   }
4673   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4674   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4675 
4676   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4677   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4678   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4679   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4680   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4681   PetscFunctionReturn(0);
4682 }
4683 
4684 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4685 {
4686   PetscErrorCode      ierr;
4687   Mat                 B_mpi;
4688   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4689   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4690   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4691   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4692   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4693   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4694   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4695   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4696   MPI_Status          *status;
4697   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4698   PetscBT             lnkbt;
4699   Mat_Merge_SeqsToMPI *merge;
4700   PetscContainer      container;
4701 
4702   PetscFunctionBegin;
4703   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4704 
4705   /* make sure it is a PETSc comm */
4706   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4707   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4708   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4709 
4710   ierr = PetscNew(&merge);CHKERRQ(ierr);
4711   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4712 
4713   /* determine row ownership */
4714   /*---------------------------------------------------------*/
4715   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4716   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4717   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4718   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4719   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4720   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4721   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4722 
4723   m      = merge->rowmap->n;
4724   owners = merge->rowmap->range;
4725 
4726   /* determine the number of messages to send, their lengths */
4727   /*---------------------------------------------------------*/
4728   len_s = merge->len_s;
4729 
4730   len          = 0; /* length of buf_si[] */
4731   merge->nsend = 0;
4732   for (proc=0; proc<size; proc++) {
4733     len_si[proc] = 0;
4734     if (proc == rank) {
4735       len_s[proc] = 0;
4736     } else {
4737       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4738       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4739     }
4740     if (len_s[proc]) {
4741       merge->nsend++;
4742       nrows = 0;
4743       for (i=owners[proc]; i<owners[proc+1]; i++) {
4744         if (ai[i+1] > ai[i]) nrows++;
4745       }
4746       len_si[proc] = 2*(nrows+1);
4747       len         += len_si[proc];
4748     }
4749   }
4750 
4751   /* determine the number and length of messages to receive for ij-structure */
4752   /*-------------------------------------------------------------------------*/
4753   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4754   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4755 
4756   /* post the Irecv of j-structure */
4757   /*-------------------------------*/
4758   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4759   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4760 
4761   /* post the Isend of j-structure */
4762   /*--------------------------------*/
4763   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4764 
4765   for (proc=0, k=0; proc<size; proc++) {
4766     if (!len_s[proc]) continue;
4767     i    = owners[proc];
4768     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4769     k++;
4770   }
4771 
4772   /* receives and sends of j-structure are complete */
4773   /*------------------------------------------------*/
4774   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4775   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4776 
4777   /* send and recv i-structure */
4778   /*---------------------------*/
4779   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4780   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4781 
4782   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4783   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4784   for (proc=0,k=0; proc<size; proc++) {
4785     if (!len_s[proc]) continue;
4786     /* form outgoing message for i-structure:
4787          buf_si[0]:                 nrows to be sent
4788                [1:nrows]:           row index (global)
4789                [nrows+1:2*nrows+1]: i-structure index
4790     */
4791     /*-------------------------------------------*/
4792     nrows       = len_si[proc]/2 - 1;
4793     buf_si_i    = buf_si + nrows+1;
4794     buf_si[0]   = nrows;
4795     buf_si_i[0] = 0;
4796     nrows       = 0;
4797     for (i=owners[proc]; i<owners[proc+1]; i++) {
4798       anzi = ai[i+1] - ai[i];
4799       if (anzi) {
4800         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4801         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4802         nrows++;
4803       }
4804     }
4805     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4806     k++;
4807     buf_si += len_si[proc];
4808   }
4809 
4810   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4811   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4812 
4813   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4814   for (i=0; i<merge->nrecv; i++) {
4815     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4816   }
4817 
4818   ierr = PetscFree(len_si);CHKERRQ(ierr);
4819   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4820   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4821   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4822   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4823   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4824   ierr = PetscFree(status);CHKERRQ(ierr);
4825 
4826   /* compute a local seq matrix in each processor */
4827   /*----------------------------------------------*/
4828   /* allocate bi array and free space for accumulating nonzero column info */
4829   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4830   bi[0] = 0;
4831 
4832   /* create and initialize a linked list */
4833   nlnk = N+1;
4834   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4835 
4836   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4837   len  = ai[owners[rank+1]] - ai[owners[rank]];
4838   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4839 
4840   current_space = free_space;
4841 
4842   /* determine symbolic info for each local row */
4843   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4844 
4845   for (k=0; k<merge->nrecv; k++) {
4846     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4847     nrows       = *buf_ri_k[k];
4848     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4849     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4850   }
4851 
4852   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4853   len  = 0;
4854   for (i=0; i<m; i++) {
4855     bnzi = 0;
4856     /* add local non-zero cols of this proc's seqmat into lnk */
4857     arow  = owners[rank] + i;
4858     anzi  = ai[arow+1] - ai[arow];
4859     aj    = a->j + ai[arow];
4860     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4861     bnzi += nlnk;
4862     /* add received col data into lnk */
4863     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4864       if (i == *nextrow[k]) { /* i-th row */
4865         anzi  = *(nextai[k]+1) - *nextai[k];
4866         aj    = buf_rj[k] + *nextai[k];
4867         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4868         bnzi += nlnk;
4869         nextrow[k]++; nextai[k]++;
4870       }
4871     }
4872     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4873 
4874     /* if free space is not available, make more free space */
4875     if (current_space->local_remaining<bnzi) {
4876       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4877       nspacedouble++;
4878     }
4879     /* copy data into free space, then initialize lnk */
4880     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4881     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4882 
4883     current_space->array           += bnzi;
4884     current_space->local_used      += bnzi;
4885     current_space->local_remaining -= bnzi;
4886 
4887     bi[i+1] = bi[i] + bnzi;
4888   }
4889 
4890   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4891 
4892   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4893   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4894   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4895 
4896   /* create symbolic parallel matrix B_mpi */
4897   /*---------------------------------------*/
4898   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4899   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4900   if (n==PETSC_DECIDE) {
4901     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4902   } else {
4903     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4904   }
4905   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4906   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4907   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4908   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4909   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4910 
4911   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4912   B_mpi->assembled    = PETSC_FALSE;
4913   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4914   merge->bi           = bi;
4915   merge->bj           = bj;
4916   merge->buf_ri       = buf_ri;
4917   merge->buf_rj       = buf_rj;
4918   merge->coi          = NULL;
4919   merge->coj          = NULL;
4920   merge->owners_co    = NULL;
4921 
4922   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4923 
4924   /* attach the supporting struct to B_mpi for reuse */
4925   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4926   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4927   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4928   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4929   *mpimat = B_mpi;
4930 
4931   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4932   PetscFunctionReturn(0);
4933 }
4934 
4935 /*@C
4936       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4937                  matrices from each processor
4938 
4939     Collective on MPI_Comm
4940 
4941    Input Parameters:
4942 +    comm - the communicators the parallel matrix will live on
4943 .    seqmat - the input sequential matrices
4944 .    m - number of local rows (or PETSC_DECIDE)
4945 .    n - number of local columns (or PETSC_DECIDE)
4946 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4947 
4948    Output Parameter:
4949 .    mpimat - the parallel matrix generated
4950 
4951     Level: advanced
4952 
4953    Notes:
4954      The dimensions of the sequential matrix in each processor MUST be the same.
4955      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4956      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4957 @*/
4958 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4959 {
4960   PetscErrorCode ierr;
4961   PetscMPIInt    size;
4962 
4963   PetscFunctionBegin;
4964   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4965   if (size == 1) {
4966     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4967     if (scall == MAT_INITIAL_MATRIX) {
4968       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4969     } else {
4970       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4971     }
4972     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4973     PetscFunctionReturn(0);
4974   }
4975   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4976   if (scall == MAT_INITIAL_MATRIX) {
4977     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4978   }
4979   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4980   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4981   PetscFunctionReturn(0);
4982 }
4983 
4984 /*@
4985      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4986           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4987           with MatGetSize()
4988 
4989     Not Collective
4990 
4991    Input Parameters:
4992 +    A - the matrix
4993 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4994 
4995    Output Parameter:
4996 .    A_loc - the local sequential matrix generated
4997 
4998     Level: developer
4999 
5000 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5001 
5002 @*/
5003 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5004 {
5005   PetscErrorCode ierr;
5006   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5007   Mat_SeqAIJ     *mat,*a,*b;
5008   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5009   MatScalar      *aa,*ba,*cam;
5010   PetscScalar    *ca;
5011   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5012   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5013   PetscBool      match;
5014   MPI_Comm       comm;
5015   PetscMPIInt    size;
5016 
5017   PetscFunctionBegin;
5018   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5019   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5020   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5021   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5022   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5023 
5024   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5025   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5026   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5027   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5028   aa = a->a; ba = b->a;
5029   if (scall == MAT_INITIAL_MATRIX) {
5030     if (size == 1) {
5031       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5032       PetscFunctionReturn(0);
5033     }
5034 
5035     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5036     ci[0] = 0;
5037     for (i=0; i<am; i++) {
5038       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5039     }
5040     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5041     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5042     k    = 0;
5043     for (i=0; i<am; i++) {
5044       ncols_o = bi[i+1] - bi[i];
5045       ncols_d = ai[i+1] - ai[i];
5046       /* off-diagonal portion of A */
5047       for (jo=0; jo<ncols_o; jo++) {
5048         col = cmap[*bj];
5049         if (col >= cstart) break;
5050         cj[k]   = col; bj++;
5051         ca[k++] = *ba++;
5052       }
5053       /* diagonal portion of A */
5054       for (j=0; j<ncols_d; j++) {
5055         cj[k]   = cstart + *aj++;
5056         ca[k++] = *aa++;
5057       }
5058       /* off-diagonal portion of A */
5059       for (j=jo; j<ncols_o; j++) {
5060         cj[k]   = cmap[*bj++];
5061         ca[k++] = *ba++;
5062       }
5063     }
5064     /* put together the new matrix */
5065     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5066     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5067     /* Since these are PETSc arrays, change flags to free them as necessary. */
5068     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5069     mat->free_a  = PETSC_TRUE;
5070     mat->free_ij = PETSC_TRUE;
5071     mat->nonew   = 0;
5072   } else if (scall == MAT_REUSE_MATRIX) {
5073     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5074     ci = mat->i; cj = mat->j; cam = mat->a;
5075     for (i=0; i<am; i++) {
5076       /* off-diagonal portion of A */
5077       ncols_o = bi[i+1] - bi[i];
5078       for (jo=0; jo<ncols_o; jo++) {
5079         col = cmap[*bj];
5080         if (col >= cstart) break;
5081         *cam++ = *ba++; bj++;
5082       }
5083       /* diagonal portion of A */
5084       ncols_d = ai[i+1] - ai[i];
5085       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5086       /* off-diagonal portion of A */
5087       for (j=jo; j<ncols_o; j++) {
5088         *cam++ = *ba++; bj++;
5089       }
5090     }
5091   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5092   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5093   PetscFunctionReturn(0);
5094 }
5095 
5096 /*@C
5097      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5098 
5099     Not Collective
5100 
5101    Input Parameters:
5102 +    A - the matrix
5103 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5104 -    row, col - index sets of rows and columns to extract (or NULL)
5105 
5106    Output Parameter:
5107 .    A_loc - the local sequential matrix generated
5108 
5109     Level: developer
5110 
5111 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5112 
5113 @*/
5114 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5115 {
5116   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5117   PetscErrorCode ierr;
5118   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5119   IS             isrowa,iscola;
5120   Mat            *aloc;
5121   PetscBool      match;
5122 
5123   PetscFunctionBegin;
5124   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5125   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5126   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5127   if (!row) {
5128     start = A->rmap->rstart; end = A->rmap->rend;
5129     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5130   } else {
5131     isrowa = *row;
5132   }
5133   if (!col) {
5134     start = A->cmap->rstart;
5135     cmap  = a->garray;
5136     nzA   = a->A->cmap->n;
5137     nzB   = a->B->cmap->n;
5138     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5139     ncols = 0;
5140     for (i=0; i<nzB; i++) {
5141       if (cmap[i] < start) idx[ncols++] = cmap[i];
5142       else break;
5143     }
5144     imark = i;
5145     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5146     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5147     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5148   } else {
5149     iscola = *col;
5150   }
5151   if (scall != MAT_INITIAL_MATRIX) {
5152     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5153     aloc[0] = *A_loc;
5154   }
5155   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5156   if (!col) { /* attach global id of condensed columns */
5157     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5158   }
5159   *A_loc = aloc[0];
5160   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5161   if (!row) {
5162     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5163   }
5164   if (!col) {
5165     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5166   }
5167   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5168   PetscFunctionReturn(0);
5169 }
5170 
5171 /*@C
5172     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5173 
5174     Collective on Mat
5175 
5176    Input Parameters:
5177 +    A,B - the matrices in mpiaij format
5178 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5179 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5180 
5181    Output Parameter:
5182 +    rowb, colb - index sets of rows and columns of B to extract
5183 -    B_seq - the sequential matrix generated
5184 
5185     Level: developer
5186 
5187 @*/
5188 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5189 {
5190   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5191   PetscErrorCode ierr;
5192   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5193   IS             isrowb,iscolb;
5194   Mat            *bseq=NULL;
5195 
5196   PetscFunctionBegin;
5197   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5198     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5199   }
5200   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5201 
5202   if (scall == MAT_INITIAL_MATRIX) {
5203     start = A->cmap->rstart;
5204     cmap  = a->garray;
5205     nzA   = a->A->cmap->n;
5206     nzB   = a->B->cmap->n;
5207     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5208     ncols = 0;
5209     for (i=0; i<nzB; i++) {  /* row < local row index */
5210       if (cmap[i] < start) idx[ncols++] = cmap[i];
5211       else break;
5212     }
5213     imark = i;
5214     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5215     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5216     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5217     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5218   } else {
5219     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5220     isrowb  = *rowb; iscolb = *colb;
5221     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5222     bseq[0] = *B_seq;
5223   }
5224   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5225   *B_seq = bseq[0];
5226   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5227   if (!rowb) {
5228     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5229   } else {
5230     *rowb = isrowb;
5231   }
5232   if (!colb) {
5233     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5234   } else {
5235     *colb = iscolb;
5236   }
5237   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5238   PetscFunctionReturn(0);
5239 }
5240 
5241 #include <petsc/private/vecscatterimpl.h>
5242 /*
5243     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5244     of the OFF-DIAGONAL portion of local A
5245 
5246     Collective on Mat
5247 
5248    Input Parameters:
5249 +    A,B - the matrices in mpiaij format
5250 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5251 
5252    Output Parameter:
5253 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5254 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5255 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5256 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5257 
5258     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5259      for this matrix. This is not desirable..
5260 
5261     Level: developer
5262 
5263 */
5264 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5265 {
5266   VecScatter_MPI_General *gen_to,*gen_from;
5267   PetscErrorCode         ierr;
5268   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5269   Mat_SeqAIJ             *b_oth;
5270   VecScatter             ctx;
5271   MPI_Comm               comm;
5272   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5273   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5274   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5275   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5276   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5277   MPI_Request            *rwaits = NULL,*swaits = NULL;
5278   MPI_Status             *sstatus,rstatus;
5279   PetscMPIInt            jj,size;
5280   VecScatterType         type;
5281   PetscBool              mpi1;
5282 
5283   PetscFunctionBegin;
5284   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5285   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5286 
5287   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5288     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5289   }
5290   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5291   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5292 
5293   if (size == 1) {
5294     startsj_s = NULL;
5295     bufa_ptr  = NULL;
5296     *B_oth    = NULL;
5297     PetscFunctionReturn(0);
5298   }
5299 
5300   ctx = a->Mvctx;
5301   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5302   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5303   if (!mpi1) {
5304     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5305      thus create a->Mvctx_mpi1 */
5306     if (!a->Mvctx_mpi1) {
5307       a->Mvctx_mpi1_flg = PETSC_TRUE;
5308       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5309     }
5310     ctx = a->Mvctx_mpi1;
5311   }
5312   tag = ((PetscObject)ctx)->tag;
5313 
5314   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5315   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5316   nrecvs   = gen_from->n;
5317   nsends   = gen_to->n;
5318 
5319   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5320   srow    = gen_to->indices;    /* local row index to be sent */
5321   sstarts = gen_to->starts;
5322   sprocs  = gen_to->procs;
5323   sstatus = gen_to->sstatus;
5324   sbs     = gen_to->bs;
5325   rstarts = gen_from->starts;
5326   rprocs  = gen_from->procs;
5327   rbs     = gen_from->bs;
5328 
5329   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5330   if (scall == MAT_INITIAL_MATRIX) {
5331     /* i-array */
5332     /*---------*/
5333     /*  post receives */
5334     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5335     for (i=0; i<nrecvs; i++) {
5336       rowlen = rvalues + rstarts[i]*rbs;
5337       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5338       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5339     }
5340 
5341     /* pack the outgoing message */
5342     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5343 
5344     sstartsj[0] = 0;
5345     rstartsj[0] = 0;
5346     len         = 0; /* total length of j or a array to be sent */
5347     k           = 0;
5348     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5349     for (i=0; i<nsends; i++) {
5350       rowlen = svalues + sstarts[i]*sbs;
5351       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5352       for (j=0; j<nrows; j++) {
5353         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5354         for (l=0; l<sbs; l++) {
5355           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5356 
5357           rowlen[j*sbs+l] = ncols;
5358 
5359           len += ncols;
5360           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5361         }
5362         k++;
5363       }
5364       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5365 
5366       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5367     }
5368     /* recvs and sends of i-array are completed */
5369     i = nrecvs;
5370     while (i--) {
5371       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5372     }
5373     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5374     ierr = PetscFree(svalues);CHKERRQ(ierr);
5375 
5376     /* allocate buffers for sending j and a arrays */
5377     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5378     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5379 
5380     /* create i-array of B_oth */
5381     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5382 
5383     b_othi[0] = 0;
5384     len       = 0; /* total length of j or a array to be received */
5385     k         = 0;
5386     for (i=0; i<nrecvs; i++) {
5387       rowlen = rvalues + rstarts[i]*rbs;
5388       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5389       for (j=0; j<nrows; j++) {
5390         b_othi[k+1] = b_othi[k] + rowlen[j];
5391         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5392         k++;
5393       }
5394       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5395     }
5396     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5397 
5398     /* allocate space for j and a arrrays of B_oth */
5399     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5400     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5401 
5402     /* j-array */
5403     /*---------*/
5404     /*  post receives of j-array */
5405     for (i=0; i<nrecvs; i++) {
5406       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5407       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5408     }
5409 
5410     /* pack the outgoing message j-array */
5411     k = 0;
5412     for (i=0; i<nsends; i++) {
5413       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5414       bufJ  = bufj+sstartsj[i];
5415       for (j=0; j<nrows; j++) {
5416         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5417         for (ll=0; ll<sbs; ll++) {
5418           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5419           for (l=0; l<ncols; l++) {
5420             *bufJ++ = cols[l];
5421           }
5422           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5423         }
5424       }
5425       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5426     }
5427 
5428     /* recvs and sends of j-array are completed */
5429     i = nrecvs;
5430     while (i--) {
5431       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5432     }
5433     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5434   } else if (scall == MAT_REUSE_MATRIX) {
5435     sstartsj = *startsj_s;
5436     rstartsj = *startsj_r;
5437     bufa     = *bufa_ptr;
5438     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5439     b_otha   = b_oth->a;
5440   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5441 
5442   /* a-array */
5443   /*---------*/
5444   /*  post receives of a-array */
5445   for (i=0; i<nrecvs; i++) {
5446     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5447     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5448   }
5449 
5450   /* pack the outgoing message a-array */
5451   k = 0;
5452   for (i=0; i<nsends; i++) {
5453     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5454     bufA  = bufa+sstartsj[i];
5455     for (j=0; j<nrows; j++) {
5456       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5457       for (ll=0; ll<sbs; ll++) {
5458         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5459         for (l=0; l<ncols; l++) {
5460           *bufA++ = vals[l];
5461         }
5462         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5463       }
5464     }
5465     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5466   }
5467   /* recvs and sends of a-array are completed */
5468   i = nrecvs;
5469   while (i--) {
5470     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5471   }
5472   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5473   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5474 
5475   if (scall == MAT_INITIAL_MATRIX) {
5476     /* put together the new matrix */
5477     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5478 
5479     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5480     /* Since these are PETSc arrays, change flags to free them as necessary. */
5481     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5482     b_oth->free_a  = PETSC_TRUE;
5483     b_oth->free_ij = PETSC_TRUE;
5484     b_oth->nonew   = 0;
5485 
5486     ierr = PetscFree(bufj);CHKERRQ(ierr);
5487     if (!startsj_s || !bufa_ptr) {
5488       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5489       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5490     } else {
5491       *startsj_s = sstartsj;
5492       *startsj_r = rstartsj;
5493       *bufa_ptr  = bufa;
5494     }
5495   }
5496   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5497   PetscFunctionReturn(0);
5498 }
5499 
5500 /*@C
5501   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5502 
5503   Not Collective
5504 
5505   Input Parameters:
5506 . A - The matrix in mpiaij format
5507 
5508   Output Parameter:
5509 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5510 . colmap - A map from global column index to local index into lvec
5511 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5512 
5513   Level: developer
5514 
5515 @*/
5516 #if defined(PETSC_USE_CTABLE)
5517 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5518 #else
5519 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5520 #endif
5521 {
5522   Mat_MPIAIJ *a;
5523 
5524   PetscFunctionBegin;
5525   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5526   PetscValidPointer(lvec, 2);
5527   PetscValidPointer(colmap, 3);
5528   PetscValidPointer(multScatter, 4);
5529   a = (Mat_MPIAIJ*) A->data;
5530   if (lvec) *lvec = a->lvec;
5531   if (colmap) *colmap = a->colmap;
5532   if (multScatter) *multScatter = a->Mvctx;
5533   PetscFunctionReturn(0);
5534 }
5535 
5536 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5537 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5538 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5539 #if defined(PETSC_HAVE_MKL_SPARSE)
5540 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5541 #endif
5542 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5543 #if defined(PETSC_HAVE_ELEMENTAL)
5544 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5545 #endif
5546 #if defined(PETSC_HAVE_HYPRE)
5547 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5548 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5549 #endif
5550 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5551 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5552 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5553 
5554 /*
5555     Computes (B'*A')' since computing B*A directly is untenable
5556 
5557                n                       p                          p
5558         (              )       (              )         (                  )
5559       m (      A       )  *  n (       B      )   =   m (         C        )
5560         (              )       (              )         (                  )
5561 
5562 */
5563 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5564 {
5565   PetscErrorCode ierr;
5566   Mat            At,Bt,Ct;
5567 
5568   PetscFunctionBegin;
5569   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5570   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5571   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5572   ierr = MatDestroy(&At);CHKERRQ(ierr);
5573   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5574   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5575   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5576   PetscFunctionReturn(0);
5577 }
5578 
5579 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5580 {
5581   PetscErrorCode ierr;
5582   PetscInt       m=A->rmap->n,n=B->cmap->n;
5583   Mat            Cmat;
5584 
5585   PetscFunctionBegin;
5586   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5587   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5588   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5589   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5590   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5591   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5592   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5593   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5594 
5595   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5596 
5597   *C = Cmat;
5598   PetscFunctionReturn(0);
5599 }
5600 
5601 /* ----------------------------------------------------------------*/
5602 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5603 {
5604   PetscErrorCode ierr;
5605 
5606   PetscFunctionBegin;
5607   if (scall == MAT_INITIAL_MATRIX) {
5608     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5609     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5610     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5611   }
5612   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5613   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5614   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5615   PetscFunctionReturn(0);
5616 }
5617 
5618 /*MC
5619    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5620 
5621    Options Database Keys:
5622 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5623 
5624   Level: beginner
5625 
5626 .seealso: MatCreateAIJ()
5627 M*/
5628 
5629 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5630 {
5631   Mat_MPIAIJ     *b;
5632   PetscErrorCode ierr;
5633   PetscMPIInt    size;
5634 
5635   PetscFunctionBegin;
5636   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5637 
5638   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5639   B->data       = (void*)b;
5640   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5641   B->assembled  = PETSC_FALSE;
5642   B->insertmode = NOT_SET_VALUES;
5643   b->size       = size;
5644 
5645   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5646 
5647   /* build cache for off array entries formed */
5648   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5649 
5650   b->donotstash  = PETSC_FALSE;
5651   b->colmap      = 0;
5652   b->garray      = 0;
5653   b->roworiented = PETSC_TRUE;
5654 
5655   /* stuff used for matrix vector multiply */
5656   b->lvec  = NULL;
5657   b->Mvctx = NULL;
5658 
5659   /* stuff for MatGetRow() */
5660   b->rowindices   = 0;
5661   b->rowvalues    = 0;
5662   b->getrowactive = PETSC_FALSE;
5663 
5664   /* flexible pointer used in CUSP/CUSPARSE classes */
5665   b->spptr = NULL;
5666 
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5677 #if defined(PETSC_HAVE_MKL_SPARSE)
5678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5679 #endif
5680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5682 #if defined(PETSC_HAVE_ELEMENTAL)
5683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5684 #endif
5685 #if defined(PETSC_HAVE_HYPRE)
5686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5687 #endif
5688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5693 #if defined(PETSC_HAVE_HYPRE)
5694   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5695 #endif
5696   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5697   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5698   PetscFunctionReturn(0);
5699 }
5700 
5701 /*@C
5702      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5703          and "off-diagonal" part of the matrix in CSR format.
5704 
5705    Collective on MPI_Comm
5706 
5707    Input Parameters:
5708 +  comm - MPI communicator
5709 .  m - number of local rows (Cannot be PETSC_DECIDE)
5710 .  n - This value should be the same as the local size used in creating the
5711        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5712        calculated if N is given) For square matrices n is almost always m.
5713 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5714 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5715 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5716 .   j - column indices
5717 .   a - matrix values
5718 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5719 .   oj - column indices
5720 -   oa - matrix values
5721 
5722    Output Parameter:
5723 .   mat - the matrix
5724 
5725    Level: advanced
5726 
5727    Notes:
5728        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5729        must free the arrays once the matrix has been destroyed and not before.
5730 
5731        The i and j indices are 0 based
5732 
5733        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5734 
5735        This sets local rows and cannot be used to set off-processor values.
5736 
5737        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5738        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5739        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5740        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5741        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5742        communication if it is known that only local entries will be set.
5743 
5744 .keywords: matrix, aij, compressed row, sparse, parallel
5745 
5746 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5747           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5748 @*/
5749 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5750 {
5751   PetscErrorCode ierr;
5752   Mat_MPIAIJ     *maij;
5753 
5754   PetscFunctionBegin;
5755   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5756   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5757   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5758   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5759   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5760   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5761   maij = (Mat_MPIAIJ*) (*mat)->data;
5762 
5763   (*mat)->preallocated = PETSC_TRUE;
5764 
5765   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5766   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5767 
5768   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5769   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5770 
5771   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5772   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5773   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5774   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5775 
5776   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5777   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5778   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5779   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5780   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5781   PetscFunctionReturn(0);
5782 }
5783 
5784 /*
5785     Special version for direct calls from Fortran
5786 */
5787 #include <petsc/private/fortranimpl.h>
5788 
5789 /* Change these macros so can be used in void function */
5790 #undef CHKERRQ
5791 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5792 #undef SETERRQ2
5793 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5794 #undef SETERRQ3
5795 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5796 #undef SETERRQ
5797 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5798 
5799 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5800 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5801 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5802 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5803 #else
5804 #endif
5805 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5806 {
5807   Mat            mat  = *mmat;
5808   PetscInt       m    = *mm, n = *mn;
5809   InsertMode     addv = *maddv;
5810   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5811   PetscScalar    value;
5812   PetscErrorCode ierr;
5813 
5814   MatCheckPreallocated(mat,1);
5815   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5816 
5817 #if defined(PETSC_USE_DEBUG)
5818   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5819 #endif
5820   {
5821     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5822     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5823     PetscBool roworiented = aij->roworiented;
5824 
5825     /* Some Variables required in the macro */
5826     Mat        A                 = aij->A;
5827     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5828     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5829     MatScalar  *aa               = a->a;
5830     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5831     Mat        B                 = aij->B;
5832     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5833     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5834     MatScalar  *ba               = b->a;
5835 
5836     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5837     PetscInt  nonew = a->nonew;
5838     MatScalar *ap1,*ap2;
5839 
5840     PetscFunctionBegin;
5841     for (i=0; i<m; i++) {
5842       if (im[i] < 0) continue;
5843 #if defined(PETSC_USE_DEBUG)
5844       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5845 #endif
5846       if (im[i] >= rstart && im[i] < rend) {
5847         row      = im[i] - rstart;
5848         lastcol1 = -1;
5849         rp1      = aj + ai[row];
5850         ap1      = aa + ai[row];
5851         rmax1    = aimax[row];
5852         nrow1    = ailen[row];
5853         low1     = 0;
5854         high1    = nrow1;
5855         lastcol2 = -1;
5856         rp2      = bj + bi[row];
5857         ap2      = ba + bi[row];
5858         rmax2    = bimax[row];
5859         nrow2    = bilen[row];
5860         low2     = 0;
5861         high2    = nrow2;
5862 
5863         for (j=0; j<n; j++) {
5864           if (roworiented) value = v[i*n+j];
5865           else value = v[i+j*m];
5866           if (in[j] >= cstart && in[j] < cend) {
5867             col = in[j] - cstart;
5868             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5869             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5870           } else if (in[j] < 0) continue;
5871 #if defined(PETSC_USE_DEBUG)
5872           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5873           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5874 #endif
5875           else {
5876             if (mat->was_assembled) {
5877               if (!aij->colmap) {
5878                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5879               }
5880 #if defined(PETSC_USE_CTABLE)
5881               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5882               col--;
5883 #else
5884               col = aij->colmap[in[j]] - 1;
5885 #endif
5886               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5887               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5888                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5889                 col  =  in[j];
5890                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5891                 B     = aij->B;
5892                 b     = (Mat_SeqAIJ*)B->data;
5893                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5894                 rp2   = bj + bi[row];
5895                 ap2   = ba + bi[row];
5896                 rmax2 = bimax[row];
5897                 nrow2 = bilen[row];
5898                 low2  = 0;
5899                 high2 = nrow2;
5900                 bm    = aij->B->rmap->n;
5901                 ba    = b->a;
5902               }
5903             } else col = in[j];
5904             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5905           }
5906         }
5907       } else if (!aij->donotstash) {
5908         if (roworiented) {
5909           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5910         } else {
5911           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5912         }
5913       }
5914     }
5915   }
5916   PetscFunctionReturnVoid();
5917 }
5918