xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4099cc6bb216ea1ce0fb658a0f1ca74722a27153)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
967   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976   PetscBool      merged;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
980   /* do nondiagonal part */
981   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
982   if (!merged) {
983     /* send it on its way */
984     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
985     /* do local part */
986     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
987     /* receive remote parts: note this assumes the values are not actually */
988     /* added in yy until the next line, */
989     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990   } else {
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* send it on its way */
994     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995     /* values actually were received in the Begin() but we need to call this nop */
996     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1002 {
1003   MPI_Comm       comm;
1004   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1005   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1006   IS             Me,Notme;
1007   PetscErrorCode ierr;
1008   PetscInt       M,N,first,last,*notme,i;
1009   PetscMPIInt    size;
1010 
1011   PetscFunctionBegin;
1012   /* Easy test: symmetric diagonal block */
1013   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1014   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1015   if (!*f) PetscFunctionReturn(0);
1016   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1017   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1018   if (size == 1) PetscFunctionReturn(0);
1019 
1020   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1021   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1022   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1024   for (i=0; i<first; i++) notme[i] = i;
1025   for (i=last; i<M; i++) notme[i-last+first] = i;
1026   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1027   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1028   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1029   Aoff = Aoffs[0];
1030   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1031   Boff = Boffs[0];
1032   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1033   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1035   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1036   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1037   ierr = PetscFree(notme);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1042 {
1043   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1044   PetscErrorCode ierr;
1045 
1046   PetscFunctionBegin;
1047   /* do nondiagonal part */
1048   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1049   /* send it on its way */
1050   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1051   /* do local part */
1052   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1053   /* receive remote parts */
1054   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 /*
1059   This only works correctly for square matrices where the subblock A->A is the
1060    diagonal block
1061 */
1062 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1063 {
1064   PetscErrorCode ierr;
1065   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1066 
1067   PetscFunctionBegin;
1068   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1069   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1070   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1075 {
1076   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1077   PetscErrorCode ierr;
1078 
1079   PetscFunctionBegin;
1080   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1081   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1086 {
1087   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091 #if defined(PETSC_USE_LOG)
1092   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1093 #endif
1094   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1095   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1096   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1097   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1098 #if defined(PETSC_USE_CTABLE)
1099   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1100 #else
1101   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1102 #endif
1103   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1104   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1105   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1106   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1107   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1108   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1109 
1110   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1111   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1112   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1113   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1114   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1115   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1116   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1117   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1118 #if defined(PETSC_HAVE_ELEMENTAL)
1119   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1120 #endif
1121 #if defined(PETSC_HAVE_HYPRE)
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1124 #endif
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1129 {
1130   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1131   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1132   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1133   PetscErrorCode ierr;
1134   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1135   int            fd;
1136   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1137   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1138   PetscScalar    *column_values;
1139   PetscInt       message_count,flowcontrolcount;
1140   FILE           *file;
1141 
1142   PetscFunctionBegin;
1143   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1144   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1145   nz   = A->nz + B->nz;
1146   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1147   if (!rank) {
1148     header[0] = MAT_FILE_CLASSID;
1149     header[1] = mat->rmap->N;
1150     header[2] = mat->cmap->N;
1151 
1152     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1153     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1154     /* get largest number of rows any processor has */
1155     rlen  = mat->rmap->n;
1156     range = mat->rmap->range;
1157     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1158   } else {
1159     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1160     rlen = mat->rmap->n;
1161   }
1162 
1163   /* load up the local row counts */
1164   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1165   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1166 
1167   /* store the row lengths to the file */
1168   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1169   if (!rank) {
1170     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1171     for (i=1; i<size; i++) {
1172       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1173       rlen = range[i+1] - range[i];
1174       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1176     }
1177     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1178   } else {
1179     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1180     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1181     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1182   }
1183   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1184 
1185   /* load up the local column indices */
1186   nzmax = nz; /* th processor needs space a largest processor needs */
1187   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1188   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1189   cnt   = 0;
1190   for (i=0; i<mat->rmap->n; i++) {
1191     for (j=B->i[i]; j<B->i[i+1]; j++) {
1192       if ((col = garray[B->j[j]]) > cstart) break;
1193       column_indices[cnt++] = col;
1194     }
1195     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1196     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1197   }
1198   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1199 
1200   /* store the column indices to the file */
1201   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1202   if (!rank) {
1203     MPI_Status status;
1204     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     for (i=1; i<size; i++) {
1206       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1207       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1208       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1209       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1210       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1211     }
1212     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1213   } else {
1214     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1215     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1216     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1217     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1218   }
1219   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1220 
1221   /* load up the local column values */
1222   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1223   cnt  = 0;
1224   for (i=0; i<mat->rmap->n; i++) {
1225     for (j=B->i[i]; j<B->i[i+1]; j++) {
1226       if (garray[B->j[j]] > cstart) break;
1227       column_values[cnt++] = B->a[j];
1228     }
1229     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1230     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1231   }
1232   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1233 
1234   /* store the column values to the file */
1235   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1236   if (!rank) {
1237     MPI_Status status;
1238     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1239     for (i=1; i<size; i++) {
1240       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1241       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1242       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1243       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1244       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1245     }
1246     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1247   } else {
1248     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1249     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1250     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1251     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(column_values);CHKERRQ(ierr);
1254 
1255   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1256   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #include <petscdraw.h>
1261 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1262 {
1263   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1264   PetscErrorCode    ierr;
1265   PetscMPIInt       rank = aij->rank,size = aij->size;
1266   PetscBool         isdraw,iascii,isbinary;
1267   PetscViewer       sviewer;
1268   PetscViewerFormat format;
1269 
1270   PetscFunctionBegin;
1271   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1272   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1273   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1274   if (iascii) {
1275     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1276     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1277       MatInfo   info;
1278       PetscBool inodes;
1279 
1280       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1281       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1282       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1283       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1284       if (!inodes) {
1285         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1286                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1287       } else {
1288         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1289                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1290       }
1291       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1292       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1293       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1294       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1295       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1296       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1297       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1298       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1299       PetscFunctionReturn(0);
1300     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1301       PetscInt inodecount,inodelimit,*inodes;
1302       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1303       if (inodes) {
1304         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1305       } else {
1306         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1307       }
1308       PetscFunctionReturn(0);
1309     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1310       PetscFunctionReturn(0);
1311     }
1312   } else if (isbinary) {
1313     if (size == 1) {
1314       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1315       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1316     } else {
1317       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1318     }
1319     PetscFunctionReturn(0);
1320   } else if (isdraw) {
1321     PetscDraw draw;
1322     PetscBool isnull;
1323     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1324     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1325     if (isnull) PetscFunctionReturn(0);
1326   }
1327 
1328   {
1329     /* assemble the entire matrix onto first processor. */
1330     Mat        A;
1331     Mat_SeqAIJ *Aloc;
1332     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1333     MatScalar  *a;
1334 
1335     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1336     if (!rank) {
1337       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1338     } else {
1339       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1340     }
1341     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1342     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1343     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1344     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1345     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1346 
1347     /* copy over the A part */
1348     Aloc = (Mat_SeqAIJ*)aij->A->data;
1349     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1350     row  = mat->rmap->rstart;
1351     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1352     for (i=0; i<m; i++) {
1353       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1354       row++;
1355       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1356     }
1357     aj = Aloc->j;
1358     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1359 
1360     /* copy over the B part */
1361     Aloc = (Mat_SeqAIJ*)aij->B->data;
1362     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1363     row  = mat->rmap->rstart;
1364     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1365     ct   = cols;
1366     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1367     for (i=0; i<m; i++) {
1368       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1369       row++;
1370       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1371     }
1372     ierr = PetscFree(ct);CHKERRQ(ierr);
1373     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1374     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1375     /*
1376        Everyone has to call to draw the matrix since the graphics waits are
1377        synchronized across all processors that share the PetscDraw object
1378     */
1379     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1380     if (!rank) {
1381       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1382       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1383     }
1384     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1385     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1386     ierr = MatDestroy(&A);CHKERRQ(ierr);
1387   }
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1392 {
1393   PetscErrorCode ierr;
1394   PetscBool      iascii,isdraw,issocket,isbinary;
1395 
1396   PetscFunctionBegin;
1397   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1398   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1399   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1400   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1401   if (iascii || isdraw || isbinary || issocket) {
1402     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1408 {
1409   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1410   PetscErrorCode ierr;
1411   Vec            bb1 = 0;
1412   PetscBool      hasop;
1413 
1414   PetscFunctionBegin;
1415   if (flag == SOR_APPLY_UPPER) {
1416     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1417     PetscFunctionReturn(0);
1418   }
1419 
1420   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1421     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1422   }
1423 
1424   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1425     if (flag & SOR_ZERO_INITIAL_GUESS) {
1426       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1427       its--;
1428     }
1429 
1430     while (its--) {
1431       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1432       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1433 
1434       /* update rhs: bb1 = bb - B*x */
1435       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1436       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1437 
1438       /* local sweep */
1439       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1440     }
1441   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1442     if (flag & SOR_ZERO_INITIAL_GUESS) {
1443       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1444       its--;
1445     }
1446     while (its--) {
1447       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1448       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1449 
1450       /* update rhs: bb1 = bb - B*x */
1451       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1452       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1453 
1454       /* local sweep */
1455       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1456     }
1457   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1458     if (flag & SOR_ZERO_INITIAL_GUESS) {
1459       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1460       its--;
1461     }
1462     while (its--) {
1463       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1464       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1465 
1466       /* update rhs: bb1 = bb - B*x */
1467       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1468       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1469 
1470       /* local sweep */
1471       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1472     }
1473   } else if (flag & SOR_EISENSTAT) {
1474     Vec xx1;
1475 
1476     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1477     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1478 
1479     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1480     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1481     if (!mat->diag) {
1482       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1483       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1484     }
1485     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1486     if (hasop) {
1487       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1488     } else {
1489       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1490     }
1491     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1492 
1493     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1494 
1495     /* local sweep */
1496     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1497     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1498     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1499   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1500 
1501   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1502 
1503   matin->factorerrortype = mat->A->factorerrortype;
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1508 {
1509   Mat            aA,aB,Aperm;
1510   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1511   PetscScalar    *aa,*ba;
1512   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1513   PetscSF        rowsf,sf;
1514   IS             parcolp = NULL;
1515   PetscBool      done;
1516   PetscErrorCode ierr;
1517 
1518   PetscFunctionBegin;
1519   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1520   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1521   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1522   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1523 
1524   /* Invert row permutation to find out where my rows should go */
1525   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1526   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1527   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1528   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1529   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1530   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1531 
1532   /* Invert column permutation to find out where my columns should go */
1533   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1534   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1535   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1536   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1537   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1538   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1539   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1540 
1541   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1542   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1543   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1544 
1545   /* Find out where my gcols should go */
1546   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1547   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1548   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1549   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1550   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1551   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1552   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1553   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1554 
1555   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1556   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1557   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1558   for (i=0; i<m; i++) {
1559     PetscInt row = rdest[i],rowner;
1560     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1561     for (j=ai[i]; j<ai[i+1]; j++) {
1562       PetscInt cowner,col = cdest[aj[j]];
1563       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1564       if (rowner == cowner) dnnz[i]++;
1565       else onnz[i]++;
1566     }
1567     for (j=bi[i]; j<bi[i+1]; j++) {
1568       PetscInt cowner,col = gcdest[bj[j]];
1569       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1570       if (rowner == cowner) dnnz[i]++;
1571       else onnz[i]++;
1572     }
1573   }
1574   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1575   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1576   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1577   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1578   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1579 
1580   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1581   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1582   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1583   for (i=0; i<m; i++) {
1584     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1585     PetscInt j0,rowlen;
1586     rowlen = ai[i+1] - ai[i];
1587     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1588       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1589       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1590     }
1591     rowlen = bi[i+1] - bi[i];
1592     for (j0=j=0; j<rowlen; j0=j) {
1593       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1594       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1595     }
1596   }
1597   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1598   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1599   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1600   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1601   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1602   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1603   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1604   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1605   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1606   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1607   *B = Aperm;
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1612 {
1613   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1614   PetscErrorCode ierr;
1615 
1616   PetscFunctionBegin;
1617   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1618   if (ghosts) *ghosts = aij->garray;
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   Mat            A    = mat->A,B = mat->B;
1626   PetscErrorCode ierr;
1627   PetscReal      isend[5],irecv[5];
1628 
1629   PetscFunctionBegin;
1630   info->block_size = 1.0;
1631   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1632 
1633   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1634   isend[3] = info->memory;  isend[4] = info->mallocs;
1635 
1636   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1637 
1638   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1639   isend[3] += info->memory;  isend[4] += info->mallocs;
1640   if (flag == MAT_LOCAL) {
1641     info->nz_used      = isend[0];
1642     info->nz_allocated = isend[1];
1643     info->nz_unneeded  = isend[2];
1644     info->memory       = isend[3];
1645     info->mallocs      = isend[4];
1646   } else if (flag == MAT_GLOBAL_MAX) {
1647     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1648 
1649     info->nz_used      = irecv[0];
1650     info->nz_allocated = irecv[1];
1651     info->nz_unneeded  = irecv[2];
1652     info->memory       = irecv[3];
1653     info->mallocs      = irecv[4];
1654   } else if (flag == MAT_GLOBAL_SUM) {
1655     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1656 
1657     info->nz_used      = irecv[0];
1658     info->nz_allocated = irecv[1];
1659     info->nz_unneeded  = irecv[2];
1660     info->memory       = irecv[3];
1661     info->mallocs      = irecv[4];
1662   }
1663   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1664   info->fill_ratio_needed = 0;
1665   info->factor_mallocs    = 0;
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1670 {
1671   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1672   PetscErrorCode ierr;
1673 
1674   PetscFunctionBegin;
1675   switch (op) {
1676   case MAT_NEW_NONZERO_LOCATIONS:
1677   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1678   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1679   case MAT_KEEP_NONZERO_PATTERN:
1680   case MAT_NEW_NONZERO_LOCATION_ERR:
1681   case MAT_USE_INODES:
1682   case MAT_IGNORE_ZERO_ENTRIES:
1683     MatCheckPreallocated(A,1);
1684     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1685     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1686     break;
1687   case MAT_ROW_ORIENTED:
1688     MatCheckPreallocated(A,1);
1689     a->roworiented = flg;
1690 
1691     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1692     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1693     break;
1694   case MAT_NEW_DIAGONALS:
1695     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1696     break;
1697   case MAT_IGNORE_OFF_PROC_ENTRIES:
1698     a->donotstash = flg;
1699     break;
1700   case MAT_SPD:
1701     A->spd_set = PETSC_TRUE;
1702     A->spd     = flg;
1703     if (flg) {
1704       A->symmetric                  = PETSC_TRUE;
1705       A->structurally_symmetric     = PETSC_TRUE;
1706       A->symmetric_set              = PETSC_TRUE;
1707       A->structurally_symmetric_set = PETSC_TRUE;
1708     }
1709     break;
1710   case MAT_SYMMETRIC:
1711     MatCheckPreallocated(A,1);
1712     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1713     break;
1714   case MAT_STRUCTURALLY_SYMMETRIC:
1715     MatCheckPreallocated(A,1);
1716     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1717     break;
1718   case MAT_HERMITIAN:
1719     MatCheckPreallocated(A,1);
1720     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1721     break;
1722   case MAT_SYMMETRY_ETERNAL:
1723     MatCheckPreallocated(A,1);
1724     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1725     break;
1726   case MAT_SUBMAT_SINGLEIS:
1727     A->submat_singleis = flg;
1728     break;
1729   case MAT_STRUCTURE_ONLY:
1730     /* The option is handled directly by MatSetOption() */
1731     break;
1732   default:
1733     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1734   }
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1739 {
1740   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1741   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1742   PetscErrorCode ierr;
1743   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1744   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1745   PetscInt       *cmap,*idx_p;
1746 
1747   PetscFunctionBegin;
1748   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1749   mat->getrowactive = PETSC_TRUE;
1750 
1751   if (!mat->rowvalues && (idx || v)) {
1752     /*
1753         allocate enough space to hold information from the longest row.
1754     */
1755     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1756     PetscInt   max = 1,tmp;
1757     for (i=0; i<matin->rmap->n; i++) {
1758       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1759       if (max < tmp) max = tmp;
1760     }
1761     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1762   }
1763 
1764   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1765   lrow = row - rstart;
1766 
1767   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1768   if (!v)   {pvA = 0; pvB = 0;}
1769   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1770   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1771   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1772   nztot = nzA + nzB;
1773 
1774   cmap = mat->garray;
1775   if (v  || idx) {
1776     if (nztot) {
1777       /* Sort by increasing column numbers, assuming A and B already sorted */
1778       PetscInt imark = -1;
1779       if (v) {
1780         *v = v_p = mat->rowvalues;
1781         for (i=0; i<nzB; i++) {
1782           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1783           else break;
1784         }
1785         imark = i;
1786         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1787         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1788       }
1789       if (idx) {
1790         *idx = idx_p = mat->rowindices;
1791         if (imark > -1) {
1792           for (i=0; i<imark; i++) {
1793             idx_p[i] = cmap[cworkB[i]];
1794           }
1795         } else {
1796           for (i=0; i<nzB; i++) {
1797             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1798             else break;
1799           }
1800           imark = i;
1801         }
1802         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1803         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1804       }
1805     } else {
1806       if (idx) *idx = 0;
1807       if (v)   *v   = 0;
1808     }
1809   }
1810   *nz  = nztot;
1811   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1812   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1817 {
1818   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1819 
1820   PetscFunctionBegin;
1821   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1822   aij->getrowactive = PETSC_FALSE;
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1827 {
1828   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1829   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1830   PetscErrorCode ierr;
1831   PetscInt       i,j,cstart = mat->cmap->rstart;
1832   PetscReal      sum = 0.0;
1833   MatScalar      *v;
1834 
1835   PetscFunctionBegin;
1836   if (aij->size == 1) {
1837     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1838   } else {
1839     if (type == NORM_FROBENIUS) {
1840       v = amat->a;
1841       for (i=0; i<amat->nz; i++) {
1842         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1843       }
1844       v = bmat->a;
1845       for (i=0; i<bmat->nz; i++) {
1846         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1847       }
1848       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1849       *norm = PetscSqrtReal(*norm);
1850       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1851     } else if (type == NORM_1) { /* max column norm */
1852       PetscReal *tmp,*tmp2;
1853       PetscInt  *jj,*garray = aij->garray;
1854       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1855       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1856       *norm = 0.0;
1857       v     = amat->a; jj = amat->j;
1858       for (j=0; j<amat->nz; j++) {
1859         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1860       }
1861       v = bmat->a; jj = bmat->j;
1862       for (j=0; j<bmat->nz; j++) {
1863         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1864       }
1865       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1866       for (j=0; j<mat->cmap->N; j++) {
1867         if (tmp2[j] > *norm) *norm = tmp2[j];
1868       }
1869       ierr = PetscFree(tmp);CHKERRQ(ierr);
1870       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1871       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1872     } else if (type == NORM_INFINITY) { /* max row norm */
1873       PetscReal ntemp = 0.0;
1874       for (j=0; j<aij->A->rmap->n; j++) {
1875         v   = amat->a + amat->i[j];
1876         sum = 0.0;
1877         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1878           sum += PetscAbsScalar(*v); v++;
1879         }
1880         v = bmat->a + bmat->i[j];
1881         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1882           sum += PetscAbsScalar(*v); v++;
1883         }
1884         if (sum > ntemp) ntemp = sum;
1885       }
1886       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1887       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1888     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1894 {
1895   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1896   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1897   PetscErrorCode ierr;
1898   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1899   PetscInt       cstart = A->cmap->rstart,ncol;
1900   Mat            B;
1901   MatScalar      *array;
1902 
1903   PetscFunctionBegin;
1904   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1905 
1906   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1907   ai = Aloc->i; aj = Aloc->j;
1908   bi = Bloc->i; bj = Bloc->j;
1909   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1910     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1911     PetscSFNode          *oloc;
1912     PETSC_UNUSED PetscSF sf;
1913 
1914     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1915     /* compute d_nnz for preallocation */
1916     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1917     for (i=0; i<ai[ma]; i++) {
1918       d_nnz[aj[i]]++;
1919       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1920     }
1921     /* compute local off-diagonal contributions */
1922     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1923     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1924     /* map those to global */
1925     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1926     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1927     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1928     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1929     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1930     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1931     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1932 
1933     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1934     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1935     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1936     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1937     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1938     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1939   } else {
1940     B    = *matout;
1941     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1942     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1943   }
1944 
1945   /* copy over the A part */
1946   array = Aloc->a;
1947   row   = A->rmap->rstart;
1948   for (i=0; i<ma; i++) {
1949     ncol = ai[i+1]-ai[i];
1950     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1951     row++;
1952     array += ncol; aj += ncol;
1953   }
1954   aj = Aloc->j;
1955   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1956 
1957   /* copy over the B part */
1958   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1959   array = Bloc->a;
1960   row   = A->rmap->rstart;
1961   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1962   cols_tmp = cols;
1963   for (i=0; i<mb; i++) {
1964     ncol = bi[i+1]-bi[i];
1965     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1966     row++;
1967     array += ncol; cols_tmp += ncol;
1968   }
1969   ierr = PetscFree(cols);CHKERRQ(ierr);
1970 
1971   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1972   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1973   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1974     *matout = B;
1975   } else {
1976     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1977   }
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1982 {
1983   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1984   Mat            a    = aij->A,b = aij->B;
1985   PetscErrorCode ierr;
1986   PetscInt       s1,s2,s3;
1987 
1988   PetscFunctionBegin;
1989   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1990   if (rr) {
1991     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1992     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1993     /* Overlap communication with computation. */
1994     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1995   }
1996   if (ll) {
1997     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1998     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1999     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2000   }
2001   /* scale  the diagonal block */
2002   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2003 
2004   if (rr) {
2005     /* Do a scatter end and then right scale the off-diagonal block */
2006     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2007     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2008   }
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2013 {
2014   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2015   PetscErrorCode ierr;
2016 
2017   PetscFunctionBegin;
2018   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2023 {
2024   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2025   Mat            a,b,c,d;
2026   PetscBool      flg;
2027   PetscErrorCode ierr;
2028 
2029   PetscFunctionBegin;
2030   a = matA->A; b = matA->B;
2031   c = matB->A; d = matB->B;
2032 
2033   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2034   if (flg) {
2035     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2036   }
2037   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2042 {
2043   PetscErrorCode ierr;
2044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2045   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2046 
2047   PetscFunctionBegin;
2048   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2049   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2050     /* because of the column compression in the off-processor part of the matrix a->B,
2051        the number of columns in a->B and b->B may be different, hence we cannot call
2052        the MatCopy() directly on the two parts. If need be, we can provide a more
2053        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2054        then copying the submatrices */
2055     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2056   } else {
2057     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2058     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2059   }
2060   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2065 {
2066   PetscErrorCode ierr;
2067 
2068   PetscFunctionBegin;
2069   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 /*
2074    Computes the number of nonzeros per row needed for preallocation when X and Y
2075    have different nonzero structure.
2076 */
2077 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2078 {
2079   PetscInt       i,j,k,nzx,nzy;
2080 
2081   PetscFunctionBegin;
2082   /* Set the number of nonzeros in the new matrix */
2083   for (i=0; i<m; i++) {
2084     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2085     nzx = xi[i+1] - xi[i];
2086     nzy = yi[i+1] - yi[i];
2087     nnz[i] = 0;
2088     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2089       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2090       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2091       nnz[i]++;
2092     }
2093     for (; k<nzy; k++) nnz[i]++;
2094   }
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2099 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2100 {
2101   PetscErrorCode ierr;
2102   PetscInt       m = Y->rmap->N;
2103   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2104   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2105 
2106   PetscFunctionBegin;
2107   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2112 {
2113   PetscErrorCode ierr;
2114   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2115   PetscBLASInt   bnz,one=1;
2116   Mat_SeqAIJ     *x,*y;
2117 
2118   PetscFunctionBegin;
2119   if (str == SAME_NONZERO_PATTERN) {
2120     PetscScalar alpha = a;
2121     x    = (Mat_SeqAIJ*)xx->A->data;
2122     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2123     y    = (Mat_SeqAIJ*)yy->A->data;
2124     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2125     x    = (Mat_SeqAIJ*)xx->B->data;
2126     y    = (Mat_SeqAIJ*)yy->B->data;
2127     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2128     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2129     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2130   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2131     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2132   } else {
2133     Mat      B;
2134     PetscInt *nnz_d,*nnz_o;
2135     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2136     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2137     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2138     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2139     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2140     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2141     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2142     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2143     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2144     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2145     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2146     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2147     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2148     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2149   }
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2154 
2155 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2156 {
2157 #if defined(PETSC_USE_COMPLEX)
2158   PetscErrorCode ierr;
2159   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2160 
2161   PetscFunctionBegin;
2162   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2163   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2164 #else
2165   PetscFunctionBegin;
2166 #endif
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2171 {
2172   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2173   PetscErrorCode ierr;
2174 
2175   PetscFunctionBegin;
2176   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2177   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2182 {
2183   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2184   PetscErrorCode ierr;
2185 
2186   PetscFunctionBegin;
2187   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2188   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2193 {
2194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2195   PetscErrorCode ierr;
2196   PetscInt       i,*idxb = 0;
2197   PetscScalar    *va,*vb;
2198   Vec            vtmp;
2199 
2200   PetscFunctionBegin;
2201   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2202   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2203   if (idx) {
2204     for (i=0; i<A->rmap->n; i++) {
2205       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2206     }
2207   }
2208 
2209   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2210   if (idx) {
2211     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2212   }
2213   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2214   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2215 
2216   for (i=0; i<A->rmap->n; i++) {
2217     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2218       va[i] = vb[i];
2219       if (idx) idx[i] = a->garray[idxb[i]];
2220     }
2221   }
2222 
2223   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2224   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2225   ierr = PetscFree(idxb);CHKERRQ(ierr);
2226   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2231 {
2232   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2233   PetscErrorCode ierr;
2234   PetscInt       i,*idxb = 0;
2235   PetscScalar    *va,*vb;
2236   Vec            vtmp;
2237 
2238   PetscFunctionBegin;
2239   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2240   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2241   if (idx) {
2242     for (i=0; i<A->cmap->n; i++) {
2243       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2244     }
2245   }
2246 
2247   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2248   if (idx) {
2249     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2250   }
2251   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2252   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2253 
2254   for (i=0; i<A->rmap->n; i++) {
2255     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2256       va[i] = vb[i];
2257       if (idx) idx[i] = a->garray[idxb[i]];
2258     }
2259   }
2260 
2261   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2262   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2263   ierr = PetscFree(idxb);CHKERRQ(ierr);
2264   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2269 {
2270   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2271   PetscInt       n      = A->rmap->n;
2272   PetscInt       cstart = A->cmap->rstart;
2273   PetscInt       *cmap  = mat->garray;
2274   PetscInt       *diagIdx, *offdiagIdx;
2275   Vec            diagV, offdiagV;
2276   PetscScalar    *a, *diagA, *offdiagA;
2277   PetscInt       r;
2278   PetscErrorCode ierr;
2279 
2280   PetscFunctionBegin;
2281   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2282   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2283   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2284   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2285   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2286   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2287   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2288   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2289   for (r = 0; r < n; ++r) {
2290     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2291       a[r]   = diagA[r];
2292       idx[r] = cstart + diagIdx[r];
2293     } else {
2294       a[r]   = offdiagA[r];
2295       idx[r] = cmap[offdiagIdx[r]];
2296     }
2297   }
2298   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2299   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2300   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2301   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2302   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2303   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2308 {
2309   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2310   PetscInt       n      = A->rmap->n;
2311   PetscInt       cstart = A->cmap->rstart;
2312   PetscInt       *cmap  = mat->garray;
2313   PetscInt       *diagIdx, *offdiagIdx;
2314   Vec            diagV, offdiagV;
2315   PetscScalar    *a, *diagA, *offdiagA;
2316   PetscInt       r;
2317   PetscErrorCode ierr;
2318 
2319   PetscFunctionBegin;
2320   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2321   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2322   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2323   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2324   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2325   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2326   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2327   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2328   for (r = 0; r < n; ++r) {
2329     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2330       a[r]   = diagA[r];
2331       idx[r] = cstart + diagIdx[r];
2332     } else {
2333       a[r]   = offdiagA[r];
2334       idx[r] = cmap[offdiagIdx[r]];
2335     }
2336   }
2337   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2338   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2339   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2340   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2341   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2342   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2347 {
2348   PetscErrorCode ierr;
2349   Mat            *dummy;
2350 
2351   PetscFunctionBegin;
2352   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2353   *newmat = *dummy;
2354   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2355   PetscFunctionReturn(0);
2356 }
2357 
2358 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2359 {
2360   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2361   PetscErrorCode ierr;
2362 
2363   PetscFunctionBegin;
2364   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2365   A->factorerrortype = a->A->factorerrortype;
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2370 {
2371   PetscErrorCode ierr;
2372   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2373 
2374   PetscFunctionBegin;
2375   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2376   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2377   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2378   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2383 {
2384   PetscFunctionBegin;
2385   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2386   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 /*@
2391    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2392 
2393    Collective on Mat
2394 
2395    Input Parameters:
2396 +    A - the matrix
2397 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2398 
2399  Level: advanced
2400 
2401 @*/
2402 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2403 {
2404   PetscErrorCode       ierr;
2405 
2406   PetscFunctionBegin;
2407   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2412 {
2413   PetscErrorCode       ierr;
2414   PetscBool            sc = PETSC_FALSE,flg;
2415   Mat_MPIAIJ           *maij = (Mat_MPIAIJ*)A->data;
2416 
2417   PetscFunctionBegin;
2418   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2419   ierr = PetscObjectOptionsBegin((PetscObject)A);
2420     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2421     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2422     if (flg) {
2423       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2424     }
2425     if (maij->A) {ierr = MatSetFromOptions(maij->A);CHKERRQ(ierr);}
2426     if (maij->A) {ierr = MatSetFromOptions(maij->B);CHKERRQ(ierr);}
2427   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2432 {
2433   PetscErrorCode ierr;
2434   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2435   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2436 
2437   PetscFunctionBegin;
2438   if (!Y->preallocated) {
2439     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2440   } else if (!aij->nz) {
2441     PetscInt nonew = aij->nonew;
2442     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2443     aij->nonew = nonew;
2444   }
2445   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2446   PetscFunctionReturn(0);
2447 }
2448 
2449 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2450 {
2451   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2452   PetscErrorCode ierr;
2453 
2454   PetscFunctionBegin;
2455   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2456   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2457   if (d) {
2458     PetscInt rstart;
2459     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2460     *d += rstart;
2461 
2462   }
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 
2467 /* -------------------------------------------------------------------*/
2468 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2469                                        MatGetRow_MPIAIJ,
2470                                        MatRestoreRow_MPIAIJ,
2471                                        MatMult_MPIAIJ,
2472                                 /* 4*/ MatMultAdd_MPIAIJ,
2473                                        MatMultTranspose_MPIAIJ,
2474                                        MatMultTransposeAdd_MPIAIJ,
2475                                        0,
2476                                        0,
2477                                        0,
2478                                 /*10*/ 0,
2479                                        0,
2480                                        0,
2481                                        MatSOR_MPIAIJ,
2482                                        MatTranspose_MPIAIJ,
2483                                 /*15*/ MatGetInfo_MPIAIJ,
2484                                        MatEqual_MPIAIJ,
2485                                        MatGetDiagonal_MPIAIJ,
2486                                        MatDiagonalScale_MPIAIJ,
2487                                        MatNorm_MPIAIJ,
2488                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2489                                        MatAssemblyEnd_MPIAIJ,
2490                                        MatSetOption_MPIAIJ,
2491                                        MatZeroEntries_MPIAIJ,
2492                                 /*24*/ MatZeroRows_MPIAIJ,
2493                                        0,
2494                                        0,
2495                                        0,
2496                                        0,
2497                                 /*29*/ MatSetUp_MPIAIJ,
2498                                        0,
2499                                        0,
2500                                        MatGetDiagonalBlock_MPIAIJ,
2501                                        0,
2502                                 /*34*/ MatDuplicate_MPIAIJ,
2503                                        0,
2504                                        0,
2505                                        0,
2506                                        0,
2507                                 /*39*/ MatAXPY_MPIAIJ,
2508                                        MatCreateSubMatrices_MPIAIJ,
2509                                        MatIncreaseOverlap_MPIAIJ,
2510                                        MatGetValues_MPIAIJ,
2511                                        MatCopy_MPIAIJ,
2512                                 /*44*/ MatGetRowMax_MPIAIJ,
2513                                        MatScale_MPIAIJ,
2514                                        MatShift_MPIAIJ,
2515                                        MatDiagonalSet_MPIAIJ,
2516                                        MatZeroRowsColumns_MPIAIJ,
2517                                 /*49*/ MatSetRandom_MPIAIJ,
2518                                        0,
2519                                        0,
2520                                        0,
2521                                        0,
2522                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2523                                        0,
2524                                        MatSetUnfactored_MPIAIJ,
2525                                        MatPermute_MPIAIJ,
2526                                        0,
2527                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2528                                        MatDestroy_MPIAIJ,
2529                                        MatView_MPIAIJ,
2530                                        0,
2531                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2532                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2533                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2534                                        0,
2535                                        0,
2536                                        0,
2537                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2538                                        MatGetRowMinAbs_MPIAIJ,
2539                                        0,
2540                                        0,
2541                                        0,
2542                                        0,
2543                                 /*75*/ MatFDColoringApply_AIJ,
2544                                        MatSetFromOptions_MPIAIJ,
2545                                        0,
2546                                        0,
2547                                        MatFindZeroDiagonals_MPIAIJ,
2548                                 /*80*/ 0,
2549                                        0,
2550                                        0,
2551                                 /*83*/ MatLoad_MPIAIJ,
2552                                        0,
2553                                        0,
2554                                        0,
2555                                        0,
2556                                        0,
2557                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2558                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2559                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2560                                        MatPtAP_MPIAIJ_MPIAIJ,
2561                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2562                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2563                                        0,
2564                                        0,
2565                                        0,
2566                                        0,
2567                                 /*99*/ 0,
2568                                        0,
2569                                        0,
2570                                        MatConjugate_MPIAIJ,
2571                                        0,
2572                                 /*104*/MatSetValuesRow_MPIAIJ,
2573                                        MatRealPart_MPIAIJ,
2574                                        MatImaginaryPart_MPIAIJ,
2575                                        0,
2576                                        0,
2577                                 /*109*/0,
2578                                        0,
2579                                        MatGetRowMin_MPIAIJ,
2580                                        0,
2581                                        MatMissingDiagonal_MPIAIJ,
2582                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2583                                        0,
2584                                        MatGetGhosts_MPIAIJ,
2585                                        0,
2586                                        0,
2587                                 /*119*/0,
2588                                        0,
2589                                        0,
2590                                        0,
2591                                        MatGetMultiProcBlock_MPIAIJ,
2592                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2593                                        MatGetColumnNorms_MPIAIJ,
2594                                        MatInvertBlockDiagonal_MPIAIJ,
2595                                        0,
2596                                        MatCreateSubMatricesMPI_MPIAIJ,
2597                                 /*129*/0,
2598                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2599                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2600                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2601                                        0,
2602                                 /*134*/0,
2603                                        0,
2604                                        MatRARt_MPIAIJ_MPIAIJ,
2605                                        0,
2606                                        0,
2607                                 /*139*/MatSetBlockSizes_MPIAIJ,
2608                                        0,
2609                                        0,
2610                                        MatFDColoringSetUp_MPIXAIJ,
2611                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2612                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2613 };
2614 
2615 /* ----------------------------------------------------------------------------------------*/
2616 
2617 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2618 {
2619   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2620   PetscErrorCode ierr;
2621 
2622   PetscFunctionBegin;
2623   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2624   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2629 {
2630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2631   PetscErrorCode ierr;
2632 
2633   PetscFunctionBegin;
2634   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2635   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2636   PetscFunctionReturn(0);
2637 }
2638 
2639 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2640 {
2641   Mat_MPIAIJ     *b;
2642   PetscErrorCode ierr;
2643 
2644   PetscFunctionBegin;
2645   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2646   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2647   b = (Mat_MPIAIJ*)B->data;
2648 
2649 #if defined(PETSC_USE_CTABLE)
2650   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2651 #else
2652   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2653 #endif
2654   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2655   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2656   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2657 
2658   /* Because the B will have been resized we simply destroy it and create a new one each time */
2659   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2660   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2661   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2662   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2663   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2664   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2665 
2666   if (!B->preallocated) {
2667     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2668     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2669     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2670     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2671     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2672   }
2673 
2674   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2675   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2676   B->preallocated  = PETSC_TRUE;
2677   B->was_assembled = PETSC_FALSE;
2678   B->assembled     = PETSC_FALSE;;
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2683 {
2684   Mat            mat;
2685   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2686   PetscErrorCode ierr;
2687 
2688   PetscFunctionBegin;
2689   *newmat = 0;
2690   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2691   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2692   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2693   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2694   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2695   a       = (Mat_MPIAIJ*)mat->data;
2696 
2697   mat->factortype   = matin->factortype;
2698   mat->assembled    = PETSC_TRUE;
2699   mat->insertmode   = NOT_SET_VALUES;
2700   mat->preallocated = PETSC_TRUE;
2701 
2702   a->size         = oldmat->size;
2703   a->rank         = oldmat->rank;
2704   a->donotstash   = oldmat->donotstash;
2705   a->roworiented  = oldmat->roworiented;
2706   a->rowindices   = 0;
2707   a->rowvalues    = 0;
2708   a->getrowactive = PETSC_FALSE;
2709 
2710   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2711   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2712 
2713   if (oldmat->colmap) {
2714 #if defined(PETSC_USE_CTABLE)
2715     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2716 #else
2717     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2718     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2719     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2720 #endif
2721   } else a->colmap = 0;
2722   if (oldmat->garray) {
2723     PetscInt len;
2724     len  = oldmat->B->cmap->n;
2725     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2726     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2727     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2728   } else a->garray = 0;
2729 
2730   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2731   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2732   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2733   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2734   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2735   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2736   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2737   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2738   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2739   *newmat = mat;
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2744 {
2745   PetscScalar    *vals,*svals;
2746   MPI_Comm       comm;
2747   PetscErrorCode ierr;
2748   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2749   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2750   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2751   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2752   PetscInt       cend,cstart,n,*rowners;
2753   int            fd;
2754   PetscInt       bs = newMat->rmap->bs;
2755 
2756   PetscFunctionBegin;
2757   /* force binary viewer to load .info file if it has not yet done so */
2758   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2759   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2760   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2761   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2762   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2763   if (!rank) {
2764     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2765     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2766     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2767   }
2768 
2769   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2770   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2771   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2772   if (bs < 0) bs = 1;
2773 
2774   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2775   M    = header[1]; N = header[2];
2776 
2777   /* If global sizes are set, check if they are consistent with that given in the file */
2778   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2779   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2780 
2781   /* determine ownership of all (block) rows */
2782   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2783   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2784   else m = newMat->rmap->n; /* Set by user */
2785 
2786   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2787   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2788 
2789   /* First process needs enough room for process with most rows */
2790   if (!rank) {
2791     mmax = rowners[1];
2792     for (i=2; i<=size; i++) {
2793       mmax = PetscMax(mmax, rowners[i]);
2794     }
2795   } else mmax = -1;             /* unused, but compilers complain */
2796 
2797   rowners[0] = 0;
2798   for (i=2; i<=size; i++) {
2799     rowners[i] += rowners[i-1];
2800   }
2801   rstart = rowners[rank];
2802   rend   = rowners[rank+1];
2803 
2804   /* distribute row lengths to all processors */
2805   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2806   if (!rank) {
2807     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2808     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2809     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2810     for (j=0; j<m; j++) {
2811       procsnz[0] += ourlens[j];
2812     }
2813     for (i=1; i<size; i++) {
2814       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2815       /* calculate the number of nonzeros on each processor */
2816       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2817         procsnz[i] += rowlengths[j];
2818       }
2819       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2820     }
2821     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2822   } else {
2823     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2824   }
2825 
2826   if (!rank) {
2827     /* determine max buffer needed and allocate it */
2828     maxnz = 0;
2829     for (i=0; i<size; i++) {
2830       maxnz = PetscMax(maxnz,procsnz[i]);
2831     }
2832     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2833 
2834     /* read in my part of the matrix column indices  */
2835     nz   = procsnz[0];
2836     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2837     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2838 
2839     /* read in every one elses and ship off */
2840     for (i=1; i<size; i++) {
2841       nz   = procsnz[i];
2842       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2843       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2844     }
2845     ierr = PetscFree(cols);CHKERRQ(ierr);
2846   } else {
2847     /* determine buffer space needed for message */
2848     nz = 0;
2849     for (i=0; i<m; i++) {
2850       nz += ourlens[i];
2851     }
2852     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2853 
2854     /* receive message of column indices*/
2855     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2856   }
2857 
2858   /* determine column ownership if matrix is not square */
2859   if (N != M) {
2860     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2861     else n = newMat->cmap->n;
2862     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2863     cstart = cend - n;
2864   } else {
2865     cstart = rstart;
2866     cend   = rend;
2867     n      = cend - cstart;
2868   }
2869 
2870   /* loop over local rows, determining number of off diagonal entries */
2871   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2872   jj   = 0;
2873   for (i=0; i<m; i++) {
2874     for (j=0; j<ourlens[i]; j++) {
2875       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2876       jj++;
2877     }
2878   }
2879 
2880   for (i=0; i<m; i++) {
2881     ourlens[i] -= offlens[i];
2882   }
2883   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2884 
2885   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2886 
2887   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2888 
2889   for (i=0; i<m; i++) {
2890     ourlens[i] += offlens[i];
2891   }
2892 
2893   if (!rank) {
2894     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2895 
2896     /* read in my part of the matrix numerical values  */
2897     nz   = procsnz[0];
2898     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2899 
2900     /* insert into matrix */
2901     jj      = rstart;
2902     smycols = mycols;
2903     svals   = vals;
2904     for (i=0; i<m; i++) {
2905       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2906       smycols += ourlens[i];
2907       svals   += ourlens[i];
2908       jj++;
2909     }
2910 
2911     /* read in other processors and ship out */
2912     for (i=1; i<size; i++) {
2913       nz   = procsnz[i];
2914       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2915       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2916     }
2917     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2918   } else {
2919     /* receive numeric values */
2920     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2921 
2922     /* receive message of values*/
2923     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2924 
2925     /* insert into matrix */
2926     jj      = rstart;
2927     smycols = mycols;
2928     svals   = vals;
2929     for (i=0; i<m; i++) {
2930       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2931       smycols += ourlens[i];
2932       svals   += ourlens[i];
2933       jj++;
2934     }
2935   }
2936   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2937   ierr = PetscFree(vals);CHKERRQ(ierr);
2938   ierr = PetscFree(mycols);CHKERRQ(ierr);
2939   ierr = PetscFree(rowners);CHKERRQ(ierr);
2940   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2941   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2942   PetscFunctionReturn(0);
2943 }
2944 
2945 /* Not scalable because of ISAllGather() unless getting all columns. */
2946 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2947 {
2948   PetscErrorCode ierr;
2949   IS             iscol_local;
2950   PetscBool      isstride;
2951   PetscMPIInt    lisstride=0,gisstride;
2952 
2953   PetscFunctionBegin;
2954   /* check if we are grabbing all columns*/
2955   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2956 
2957   if (isstride) {
2958     PetscInt  start,len,mstart,mlen;
2959     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2960     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2961     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2962     if (mstart == start && mlen-mstart == len) lisstride = 1;
2963   }
2964 
2965   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2966   if (gisstride) {
2967     PetscInt N;
2968     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
2969     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
2970     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
2971     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
2972   } else {
2973     PetscInt cbs;
2974     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
2975     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2976     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
2977   }
2978 
2979   *isseq = iscol_local;
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 /*
2984  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
2985  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
2986 
2987  Input Parameters:
2988    mat - matrix
2989    isrow - parallel row index set; its local indices are a subset of local columns of mat,
2990            i.e., mat->rstart <= isrow[i] < mat->rend
2991    iscol - parallel column index set; its local indices are a subset of local columns of mat,
2992            i.e., mat->cstart <= iscol[i] < mat->cend
2993  Output Parameter:
2994    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
2995    iscol_o - sequential column index set for retrieving mat->B
2996    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
2997  */
2998 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
2999 {
3000   PetscErrorCode ierr;
3001   Vec            x,cmap;
3002   const PetscInt *is_idx;
3003   PetscScalar    *xarray,*cmaparray;
3004   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3005   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3006   Mat            B=a->B;
3007   Vec            lvec=a->lvec,lcmap;
3008   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3009   MPI_Comm       comm;
3010 
3011   PetscFunctionBegin;
3012   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3013   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3014 
3015   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3016   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3017   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3018   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3019 
3020   /* Get start indices */
3021   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3022   isstart -= ncols;
3023   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3024 
3025   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3026   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3027   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3028   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3029   for (i=0; i<ncols; i++) {
3030     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3031     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3032     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3033   }
3034   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3035   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3036   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3037 
3038   /* Get iscol_d */
3039   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3040   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3041   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3042 
3043   /* Get isrow_d */
3044   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3045   rstart = mat->rmap->rstart;
3046   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3047   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3048   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3049   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3050 
3051   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3052   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3053   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3054 
3055   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3056   ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3057 
3058   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3059 
3060   ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3061   ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3062   ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3063 
3064   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3065   /* off-process column indices */
3066   count = 0;
3067   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3068   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3069 
3070   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3071   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3072   for (i=0; i<Bn; i++) {
3073     if (PetscRealPart(xarray[i]) > -1.0) {
3074       idx[count]     = i;                   /* local column index in off-diagonal part B */
3075       cmap1[count++] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3076     }
3077   }
3078   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3079   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3080 
3081   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3082   /* cannot ensure iscol_o has same blocksize as iscol! */
3083 
3084   ierr = PetscFree(idx);CHKERRQ(ierr);
3085 
3086   *garray = cmap1;
3087 
3088   ierr = VecDestroy(&x);CHKERRQ(ierr);
3089   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3090   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3091   PetscFunctionReturn(0);
3092 }
3093 
3094 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3095 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3096 {
3097   PetscErrorCode ierr;
3098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3099   Mat            M = NULL;
3100   MPI_Comm       comm;
3101   IS             iscol_d,isrow_d,iscol_o;
3102   Mat            Asub = NULL,Bsub = NULL;
3103   PetscInt       n;
3104 
3105   PetscFunctionBegin;
3106   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3107 
3108   if (call == MAT_REUSE_MATRIX) {
3109     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3110     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3111     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3112 
3113     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3114     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3115 
3116     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3117     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3118 
3119     /* Update diagonal and off-diagonal portions of submat */
3120     asub = (Mat_MPIAIJ*)(*submat)->data;
3121     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3122     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3123     if (n) {
3124       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3125     }
3126     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3127     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3128 
3129   } else { /* call == MAT_INITIAL_MATRIX) */
3130     const PetscInt *garray;
3131     PetscInt        BsubN;
3132 
3133     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3134     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3135 
3136     /* Create local submatrices Asub and Bsub */
3137     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3138     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3139 
3140     /* Create submatrix M */
3141     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3142 
3143     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3144     asub = (Mat_MPIAIJ*)M->data;
3145 
3146     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3147     n = asub->B->cmap->N;
3148     if (BsubN > n) {
3149       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3150       const PetscInt *idx;
3151       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3152       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3153 
3154       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3155       j = 0;
3156       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3157       for (i=0; i<n; i++) {
3158         if (j >= BsubN) break;
3159         while (subgarray[i] > garray[j]) j++;
3160 
3161         if (subgarray[i] == garray[j]) {
3162           idx_new[i] = idx[j++];
3163         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3164       }
3165       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3166 
3167       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3168       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3169 
3170     } else if (BsubN < n) {
3171       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3172     }
3173 
3174     ierr = PetscFree(garray);CHKERRQ(ierr);
3175     *submat = M;
3176 
3177     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3178     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3179     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3180 
3181     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3182     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3183 
3184     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3185     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3186   }
3187   PetscFunctionReturn(0);
3188 }
3189 
3190 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3191 {
3192   PetscErrorCode ierr;
3193   IS             iscol_local,isrow_d;
3194   PetscInt       csize;
3195   PetscInt       n,i,j,start,end;
3196   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3197   MPI_Comm       comm;
3198 
3199   PetscFunctionBegin;
3200   /* If isrow has same processor distribution as mat,
3201      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3202   if (call == MAT_REUSE_MATRIX) {
3203     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3204     if (isrow_d) {
3205       sameRowDist  = PETSC_TRUE;
3206       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3207     } else {
3208       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3209       if (iscol_local) {
3210         sameRowDist  = PETSC_TRUE;
3211         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3212       }
3213     }
3214   } else {
3215     /* Check if isrow has same processor distribution as mat */
3216     sameDist[0] = PETSC_FALSE;
3217     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3218     if (!n) {
3219       sameDist[0] = PETSC_TRUE;
3220     } else {
3221       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3222       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3223       if (i >= start && j < end) {
3224         sameDist[0] = PETSC_TRUE;
3225       }
3226     }
3227 
3228     /* Check if iscol has same processor distribution as mat */
3229     sameDist[1] = PETSC_FALSE;
3230     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3231     if (!n) {
3232       sameDist[1] = PETSC_TRUE;
3233     } else {
3234       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3235       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3236       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3237     }
3238 
3239     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3240     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3241     sameRowDist = tsameDist[0];
3242   }
3243 
3244   if (sameRowDist) {
3245     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3246       /* isrow and iscol have same processor distribution as mat */
3247       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3248     } else { /* sameRowDist */
3249       /* isrow has same processor distribution as mat */
3250       ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3251     }
3252     PetscFunctionReturn(0);
3253   }
3254 
3255   /* General case: iscol -> iscol_local which has global size of iscol */
3256   if (call == MAT_REUSE_MATRIX) {
3257     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3258     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3259   } else {
3260     ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3261   }
3262 
3263   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3264   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3265 
3266   if (call == MAT_INITIAL_MATRIX) {
3267     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3268     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3269   }
3270   PetscFunctionReturn(0);
3271 }
3272 
3273 /*@C
3274      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3275          and "off-diagonal" part of the matrix in CSR format.
3276 
3277    Collective on MPI_Comm
3278 
3279    Input Parameters:
3280 +  comm - MPI communicator
3281 .  A - "diagonal" portion of matrix
3282 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3283 -  garray - global index of B columns
3284 
3285    Output Parameter:
3286 .   mat - the matrix, with input A as its local diagonal matrix
3287    Level: advanced
3288 
3289    Notes:
3290        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3291        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3292 
3293 .seealso: MatCreateMPIAIJWithSplitArrays()
3294 @*/
3295 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3296 {
3297   PetscErrorCode ierr;
3298   Mat_MPIAIJ     *maij;
3299   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3300   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3301   PetscScalar    *oa=b->a;
3302   Mat            Bnew;
3303   PetscInt       m,n,N;
3304 
3305   PetscFunctionBegin;
3306   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3307   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3308   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3309   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3310   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3311   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3312 
3313   /* Get global columns of mat */
3314   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3315 
3316   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3317   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3318   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3319   maij = (Mat_MPIAIJ*)(*mat)->data;
3320 
3321   (*mat)->preallocated = PETSC_TRUE;
3322 
3323   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3324   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3325 
3326   /* Set A as diagonal portion of *mat */
3327   maij->A = A;
3328 
3329   nz = oi[m];
3330   for (i=0; i<nz; i++) {
3331     col   = oj[i];
3332     oj[i] = garray[col];
3333   }
3334 
3335    /* Set Bnew as off-diagonal portion of *mat */
3336   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3337   bnew        = (Mat_SeqAIJ*)Bnew->data;
3338   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3339   maij->B     = Bnew;
3340 
3341   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3342 
3343   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3344   b->free_a       = PETSC_FALSE;
3345   b->free_ij      = PETSC_FALSE;
3346   ierr = MatDestroy(&B);CHKERRQ(ierr);
3347 
3348   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3349   bnew->free_a       = PETSC_TRUE;
3350   bnew->free_ij      = PETSC_TRUE;
3351 
3352   /* condense columns of maij->B */
3353   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3354   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3355   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3356   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3357   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3358   PetscFunctionReturn(0);
3359 }
3360 
3361 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3362 
3363 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3364 {
3365   PetscErrorCode ierr;
3366   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3367   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3368   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3369   Mat            M,Msub,B=a->B;
3370   MatScalar      *aa;
3371   Mat_SeqAIJ     *aij;
3372   PetscInt       *garray = a->garray,*colsub,Ncols;
3373   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3374   IS             iscol_sub,iscmap;
3375   const PetscInt *is_idx,*cmap;
3376   PetscBool      allcolumns=PETSC_FALSE;
3377   IS             iscol_local=NULL;
3378   MPI_Comm       comm;
3379 
3380   PetscFunctionBegin;
3381   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3382 
3383   if (call == MAT_REUSE_MATRIX) {
3384     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3385     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3386     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3387 
3388     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3389     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3390 
3391     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3392     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3393 
3394     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3395 
3396   } else { /* call == MAT_INITIAL_MATRIX) */
3397     PetscBool flg;
3398 
3399     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3400     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3401 
3402     /* (1) iscol -> nonscalable iscol_local */
3403     ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3404     ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3405     if (n != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != Ncols %d",n,Ncols);
3406 
3407     /* Check for special case: each processor gets entire matrix columns */
3408     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3409     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3410     if (allcolumns) {
3411       iscol_sub = iscol_local;
3412       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3413       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3414 
3415     } else {
3416       /* (2) iscol_local -> iscol_sub and iscmap */
3417       PetscInt *idx,*cmap1,k;
3418 
3419       /* implementation below requires iscol_local be sorted, it can have duplicate indices */
3420       ierr = ISSorted(iscol_local,&flg);CHKERRQ(ierr);
3421       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unsorted iscol_local is not implemented yet");
3422 
3423       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3424       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3425       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3426       count = 0;
3427       k     = 0;
3428       for (i=0; i<Ncols; i++) {
3429         j = is_idx[i];
3430         if (j >= cstart && j < cend) {
3431           /* diagonal part of mat */
3432           idx[count]     = j;
3433           cmap1[count++] = i; /* column index in submat */
3434         } else if (Bn) {
3435           /* off-diagonal part of mat */
3436           if (j == garray[k]) {
3437             idx[count]     = j;
3438             cmap1[count++] = i;  /* column index in submat */
3439           } else if (j > garray[k]) {
3440             while (j > garray[k] && k < Bn-1) k++;
3441             if (j == garray[k]) {
3442               idx[count]     = j;
3443               cmap1[count++] = i; /* column index in submat */
3444             }
3445           }
3446         }
3447       }
3448       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3449 
3450       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3451       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3452       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3453 
3454       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3455     }
3456 
3457     /* (3) Create sequential Msub */
3458     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3459   }
3460 
3461   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3462   aij  = (Mat_SeqAIJ*)(Msub)->data;
3463   ii   = aij->i;
3464   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3465 
3466   /*
3467       m - number of local rows
3468       Ncols - number of columns (same on all processors)
3469       rstart - first row in new global matrix generated
3470   */
3471   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3472 
3473   if (call == MAT_INITIAL_MATRIX) {
3474     /* (4) Create parallel newmat */
3475     PetscMPIInt    rank,size;
3476     PetscInt       csize;
3477 
3478     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3479     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3480 
3481     /*
3482         Determine the number of non-zeros in the diagonal and off-diagonal
3483         portions of the matrix in order to do correct preallocation
3484     */
3485 
3486     /* first get start and end of "diagonal" columns */
3487     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3488     if (csize == PETSC_DECIDE) {
3489       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3490       if (mglobal == Ncols) { /* square matrix */
3491         nlocal = m;
3492       } else {
3493         nlocal = Ncols/size + ((Ncols % size) > rank);
3494       }
3495     } else {
3496       nlocal = csize;
3497     }
3498     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3499     rstart = rend - nlocal;
3500     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3501 
3502     /* next, compute all the lengths */
3503     jj    = aij->j;
3504     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3505     olens = dlens + m;
3506     for (i=0; i<m; i++) {
3507       jend = ii[i+1] - ii[i];
3508       olen = 0;
3509       dlen = 0;
3510       for (j=0; j<jend; j++) {
3511         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3512         else dlen++;
3513         jj++;
3514       }
3515       olens[i] = olen;
3516       dlens[i] = dlen;
3517     }
3518 
3519     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3520     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3521 
3522     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3523     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3524     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3525     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3526     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3527     ierr = PetscFree(dlens);CHKERRQ(ierr);
3528 
3529   } else { /* call == MAT_REUSE_MATRIX */
3530     M    = *newmat;
3531     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3532     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3533     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3534     /*
3535          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3536        rather than the slower MatSetValues().
3537     */
3538     M->was_assembled = PETSC_TRUE;
3539     M->assembled     = PETSC_FALSE;
3540   }
3541 
3542   /* (5) Set values of Msub to *newmat */
3543   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3544   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3545 
3546   jj   = aij->j;
3547   aa   = aij->a;
3548   for (i=0; i<m; i++) {
3549     row = rstart + i;
3550     nz  = ii[i+1] - ii[i];
3551     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3552     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3553     jj += nz; aa += nz;
3554   }
3555   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3556 
3557   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3558   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3559 
3560   ierr = PetscFree(colsub);CHKERRQ(ierr);
3561 
3562   /* save Msub, iscol_sub and iscmap used in processor for next request */
3563   if (call ==  MAT_INITIAL_MATRIX) {
3564     *newmat = M;
3565     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3566     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3567 
3568     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3569     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3570 
3571     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3572     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3573 
3574     if (iscol_local) {
3575       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3576       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3577     }
3578   }
3579   PetscFunctionReturn(0);
3580 }
3581 
3582 /*
3583     Not great since it makes two copies of the submatrix, first an SeqAIJ
3584   in local and then by concatenating the local matrices the end result.
3585   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3586 
3587   Note: This requires a sequential iscol with all indices.
3588 */
3589 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3590 {
3591   PetscErrorCode ierr;
3592   PetscMPIInt    rank,size;
3593   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3594   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3595   Mat            M,Mreuse;
3596   MatScalar      *aa,*vwork;
3597   MPI_Comm       comm;
3598   Mat_SeqAIJ     *aij;
3599   PetscBool      colflag,allcolumns=PETSC_FALSE;
3600 
3601   PetscFunctionBegin;
3602   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3603   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3604   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3605 
3606   /* Check for special case: each processor gets entire matrix columns */
3607   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3608   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3609   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3610 
3611   if (call ==  MAT_REUSE_MATRIX) {
3612     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3613     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3614     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3615   } else {
3616     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3617   }
3618 
3619   /*
3620       m - number of local rows
3621       n - number of columns (same on all processors)
3622       rstart - first row in new global matrix generated
3623   */
3624   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3625   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3626   if (call == MAT_INITIAL_MATRIX) {
3627     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3628     ii  = aij->i;
3629     jj  = aij->j;
3630 
3631     /*
3632         Determine the number of non-zeros in the diagonal and off-diagonal
3633         portions of the matrix in order to do correct preallocation
3634     */
3635 
3636     /* first get start and end of "diagonal" columns */
3637     if (csize == PETSC_DECIDE) {
3638       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3639       if (mglobal == n) { /* square matrix */
3640         nlocal = m;
3641       } else {
3642         nlocal = n/size + ((n % size) > rank);
3643       }
3644     } else {
3645       nlocal = csize;
3646     }
3647     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3648     rstart = rend - nlocal;
3649     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3650 
3651     /* next, compute all the lengths */
3652     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3653     olens = dlens + m;
3654     for (i=0; i<m; i++) {
3655       jend = ii[i+1] - ii[i];
3656       olen = 0;
3657       dlen = 0;
3658       for (j=0; j<jend; j++) {
3659         if (*jj < rstart || *jj >= rend) olen++;
3660         else dlen++;
3661         jj++;
3662       }
3663       olens[i] = olen;
3664       dlens[i] = dlen;
3665     }
3666     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3667     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3668     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3669     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3670     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3671     ierr = PetscFree(dlens);CHKERRQ(ierr);
3672   } else {
3673     PetscInt ml,nl;
3674 
3675     M    = *newmat;
3676     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3677     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3678     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3679     /*
3680          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3681        rather than the slower MatSetValues().
3682     */
3683     M->was_assembled = PETSC_TRUE;
3684     M->assembled     = PETSC_FALSE;
3685   }
3686   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3687   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3688   ii   = aij->i;
3689   jj   = aij->j;
3690   aa   = aij->a;
3691   for (i=0; i<m; i++) {
3692     row   = rstart + i;
3693     nz    = ii[i+1] - ii[i];
3694     cwork = jj;     jj += nz;
3695     vwork = aa;     aa += nz;
3696     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3697   }
3698 
3699   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3700   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3701   *newmat = M;
3702 
3703   /* save submatrix used in processor for next request */
3704   if (call ==  MAT_INITIAL_MATRIX) {
3705     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3706     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3707   }
3708   PetscFunctionReturn(0);
3709 }
3710 
3711 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3712 {
3713   PetscInt       m,cstart, cend,j,nnz,i,d;
3714   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3715   const PetscInt *JJ;
3716   PetscScalar    *values;
3717   PetscErrorCode ierr;
3718   PetscBool      nooffprocentries;
3719 
3720   PetscFunctionBegin;
3721   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3722 
3723   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3724   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3725   m      = B->rmap->n;
3726   cstart = B->cmap->rstart;
3727   cend   = B->cmap->rend;
3728   rstart = B->rmap->rstart;
3729 
3730   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3731 
3732 #if defined(PETSC_USE_DEBUGGING)
3733   for (i=0; i<m; i++) {
3734     nnz = Ii[i+1]- Ii[i];
3735     JJ  = J + Ii[i];
3736     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3737     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3738     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3739   }
3740 #endif
3741 
3742   for (i=0; i<m; i++) {
3743     nnz     = Ii[i+1]- Ii[i];
3744     JJ      = J + Ii[i];
3745     nnz_max = PetscMax(nnz_max,nnz);
3746     d       = 0;
3747     for (j=0; j<nnz; j++) {
3748       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3749     }
3750     d_nnz[i] = d;
3751     o_nnz[i] = nnz - d;
3752   }
3753   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3754   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3755 
3756   if (v) values = (PetscScalar*)v;
3757   else {
3758     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3759   }
3760 
3761   for (i=0; i<m; i++) {
3762     ii   = i + rstart;
3763     nnz  = Ii[i+1]- Ii[i];
3764     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3765   }
3766   nooffprocentries    = B->nooffprocentries;
3767   B->nooffprocentries = PETSC_TRUE;
3768   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3769   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3770   B->nooffprocentries = nooffprocentries;
3771 
3772   if (!v) {
3773     ierr = PetscFree(values);CHKERRQ(ierr);
3774   }
3775   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3776   PetscFunctionReturn(0);
3777 }
3778 
3779 /*@
3780    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3781    (the default parallel PETSc format).
3782 
3783    Collective on MPI_Comm
3784 
3785    Input Parameters:
3786 +  B - the matrix
3787 .  i - the indices into j for the start of each local row (starts with zero)
3788 .  j - the column indices for each local row (starts with zero)
3789 -  v - optional values in the matrix
3790 
3791    Level: developer
3792 
3793    Notes:
3794        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3795      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3796      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3797 
3798        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3799 
3800        The format which is used for the sparse matrix input, is equivalent to a
3801     row-major ordering.. i.e for the following matrix, the input data expected is
3802     as shown
3803 
3804 $        1 0 0
3805 $        2 0 3     P0
3806 $       -------
3807 $        4 5 6     P1
3808 $
3809 $     Process0 [P0]: rows_owned=[0,1]
3810 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3811 $        j =  {0,0,2}  [size = 3]
3812 $        v =  {1,2,3}  [size = 3]
3813 $
3814 $     Process1 [P1]: rows_owned=[2]
3815 $        i =  {0,3}    [size = nrow+1  = 1+1]
3816 $        j =  {0,1,2}  [size = 3]
3817 $        v =  {4,5,6}  [size = 3]
3818 
3819 .keywords: matrix, aij, compressed row, sparse, parallel
3820 
3821 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3822           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3823 @*/
3824 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3825 {
3826   PetscErrorCode ierr;
3827 
3828   PetscFunctionBegin;
3829   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3830   PetscFunctionReturn(0);
3831 }
3832 
3833 /*@C
3834    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3835    (the default parallel PETSc format).  For good matrix assembly performance
3836    the user should preallocate the matrix storage by setting the parameters
3837    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3838    performance can be increased by more than a factor of 50.
3839 
3840    Collective on MPI_Comm
3841 
3842    Input Parameters:
3843 +  B - the matrix
3844 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3845            (same value is used for all local rows)
3846 .  d_nnz - array containing the number of nonzeros in the various rows of the
3847            DIAGONAL portion of the local submatrix (possibly different for each row)
3848            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3849            The size of this array is equal to the number of local rows, i.e 'm'.
3850            For matrices that will be factored, you must leave room for (and set)
3851            the diagonal entry even if it is zero.
3852 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3853            submatrix (same value is used for all local rows).
3854 -  o_nnz - array containing the number of nonzeros in the various rows of the
3855            OFF-DIAGONAL portion of the local submatrix (possibly different for
3856            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3857            structure. The size of this array is equal to the number
3858            of local rows, i.e 'm'.
3859 
3860    If the *_nnz parameter is given then the *_nz parameter is ignored
3861 
3862    The AIJ format (also called the Yale sparse matrix format or
3863    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3864    storage.  The stored row and column indices begin with zero.
3865    See Users-Manual: ch_mat for details.
3866 
3867    The parallel matrix is partitioned such that the first m0 rows belong to
3868    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3869    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3870 
3871    The DIAGONAL portion of the local submatrix of a processor can be defined
3872    as the submatrix which is obtained by extraction the part corresponding to
3873    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3874    first row that belongs to the processor, r2 is the last row belonging to
3875    the this processor, and c1-c2 is range of indices of the local part of a
3876    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3877    common case of a square matrix, the row and column ranges are the same and
3878    the DIAGONAL part is also square. The remaining portion of the local
3879    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3880 
3881    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3882 
3883    You can call MatGetInfo() to get information on how effective the preallocation was;
3884    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3885    You can also run with the option -info and look for messages with the string
3886    malloc in them to see if additional memory allocation was needed.
3887 
3888    Example usage:
3889 
3890    Consider the following 8x8 matrix with 34 non-zero values, that is
3891    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3892    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3893    as follows:
3894 
3895 .vb
3896             1  2  0  |  0  3  0  |  0  4
3897     Proc0   0  5  6  |  7  0  0  |  8  0
3898             9  0 10  | 11  0  0  | 12  0
3899     -------------------------------------
3900            13  0 14  | 15 16 17  |  0  0
3901     Proc1   0 18  0  | 19 20 21  |  0  0
3902             0  0  0  | 22 23  0  | 24  0
3903     -------------------------------------
3904     Proc2  25 26 27  |  0  0 28  | 29  0
3905            30  0  0  | 31 32 33  |  0 34
3906 .ve
3907 
3908    This can be represented as a collection of submatrices as:
3909 
3910 .vb
3911       A B C
3912       D E F
3913       G H I
3914 .ve
3915 
3916    Where the submatrices A,B,C are owned by proc0, D,E,F are
3917    owned by proc1, G,H,I are owned by proc2.
3918 
3919    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3920    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3921    The 'M','N' parameters are 8,8, and have the same values on all procs.
3922 
3923    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3924    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3925    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3926    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3927    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3928    matrix, ans [DF] as another SeqAIJ matrix.
3929 
3930    When d_nz, o_nz parameters are specified, d_nz storage elements are
3931    allocated for every row of the local diagonal submatrix, and o_nz
3932    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3933    One way to choose d_nz and o_nz is to use the max nonzerors per local
3934    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3935    In this case, the values of d_nz,o_nz are:
3936 .vb
3937      proc0 : dnz = 2, o_nz = 2
3938      proc1 : dnz = 3, o_nz = 2
3939      proc2 : dnz = 1, o_nz = 4
3940 .ve
3941    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3942    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3943    for proc3. i.e we are using 12+15+10=37 storage locations to store
3944    34 values.
3945 
3946    When d_nnz, o_nnz parameters are specified, the storage is specified
3947    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3948    In the above case the values for d_nnz,o_nnz are:
3949 .vb
3950      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3951      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3952      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3953 .ve
3954    Here the space allocated is sum of all the above values i.e 34, and
3955    hence pre-allocation is perfect.
3956 
3957    Level: intermediate
3958 
3959 .keywords: matrix, aij, compressed row, sparse, parallel
3960 
3961 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3962           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3963 @*/
3964 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3965 {
3966   PetscErrorCode ierr;
3967 
3968   PetscFunctionBegin;
3969   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3970   PetscValidType(B,1);
3971   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3972   PetscFunctionReturn(0);
3973 }
3974 
3975 /*@
3976      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3977          CSR format the local rows.
3978 
3979    Collective on MPI_Comm
3980 
3981    Input Parameters:
3982 +  comm - MPI communicator
3983 .  m - number of local rows (Cannot be PETSC_DECIDE)
3984 .  n - This value should be the same as the local size used in creating the
3985        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3986        calculated if N is given) For square matrices n is almost always m.
3987 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3988 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3989 .   i - row indices
3990 .   j - column indices
3991 -   a - matrix values
3992 
3993    Output Parameter:
3994 .   mat - the matrix
3995 
3996    Level: intermediate
3997 
3998    Notes:
3999        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4000      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4001      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4002 
4003        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4004 
4005        The format which is used for the sparse matrix input, is equivalent to a
4006     row-major ordering.. i.e for the following matrix, the input data expected is
4007     as shown
4008 
4009 $        1 0 0
4010 $        2 0 3     P0
4011 $       -------
4012 $        4 5 6     P1
4013 $
4014 $     Process0 [P0]: rows_owned=[0,1]
4015 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4016 $        j =  {0,0,2}  [size = 3]
4017 $        v =  {1,2,3}  [size = 3]
4018 $
4019 $     Process1 [P1]: rows_owned=[2]
4020 $        i =  {0,3}    [size = nrow+1  = 1+1]
4021 $        j =  {0,1,2}  [size = 3]
4022 $        v =  {4,5,6}  [size = 3]
4023 
4024 .keywords: matrix, aij, compressed row, sparse, parallel
4025 
4026 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4027           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4028 @*/
4029 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4030 {
4031   PetscErrorCode ierr;
4032 
4033   PetscFunctionBegin;
4034   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4035   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4036   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4037   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4038   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4039   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4040   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4041   PetscFunctionReturn(0);
4042 }
4043 
4044 /*@C
4045    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4046    (the default parallel PETSc format).  For good matrix assembly performance
4047    the user should preallocate the matrix storage by setting the parameters
4048    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4049    performance can be increased by more than a factor of 50.
4050 
4051    Collective on MPI_Comm
4052 
4053    Input Parameters:
4054 +  comm - MPI communicator
4055 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4056            This value should be the same as the local size used in creating the
4057            y vector for the matrix-vector product y = Ax.
4058 .  n - This value should be the same as the local size used in creating the
4059        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4060        calculated if N is given) For square matrices n is almost always m.
4061 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4062 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4063 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4064            (same value is used for all local rows)
4065 .  d_nnz - array containing the number of nonzeros in the various rows of the
4066            DIAGONAL portion of the local submatrix (possibly different for each row)
4067            or NULL, if d_nz is used to specify the nonzero structure.
4068            The size of this array is equal to the number of local rows, i.e 'm'.
4069 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4070            submatrix (same value is used for all local rows).
4071 -  o_nnz - array containing the number of nonzeros in the various rows of the
4072            OFF-DIAGONAL portion of the local submatrix (possibly different for
4073            each row) or NULL, if o_nz is used to specify the nonzero
4074            structure. The size of this array is equal to the number
4075            of local rows, i.e 'm'.
4076 
4077    Output Parameter:
4078 .  A - the matrix
4079 
4080    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4081    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4082    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4083 
4084    Notes:
4085    If the *_nnz parameter is given then the *_nz parameter is ignored
4086 
4087    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4088    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4089    storage requirements for this matrix.
4090 
4091    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4092    processor than it must be used on all processors that share the object for
4093    that argument.
4094 
4095    The user MUST specify either the local or global matrix dimensions
4096    (possibly both).
4097 
4098    The parallel matrix is partitioned across processors such that the
4099    first m0 rows belong to process 0, the next m1 rows belong to
4100    process 1, the next m2 rows belong to process 2 etc.. where
4101    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4102    values corresponding to [m x N] submatrix.
4103 
4104    The columns are logically partitioned with the n0 columns belonging
4105    to 0th partition, the next n1 columns belonging to the next
4106    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4107 
4108    The DIAGONAL portion of the local submatrix on any given processor
4109    is the submatrix corresponding to the rows and columns m,n
4110    corresponding to the given processor. i.e diagonal matrix on
4111    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4112    etc. The remaining portion of the local submatrix [m x (N-n)]
4113    constitute the OFF-DIAGONAL portion. The example below better
4114    illustrates this concept.
4115 
4116    For a square global matrix we define each processor's diagonal portion
4117    to be its local rows and the corresponding columns (a square submatrix);
4118    each processor's off-diagonal portion encompasses the remainder of the
4119    local matrix (a rectangular submatrix).
4120 
4121    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4122 
4123    When calling this routine with a single process communicator, a matrix of
4124    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4125    type of communicator, use the construction mechanism
4126 .vb
4127      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4128 .ve
4129 
4130    By default, this format uses inodes (identical nodes) when possible.
4131    We search for consecutive rows with the same nonzero structure, thereby
4132    reusing matrix information to achieve increased efficiency.
4133 
4134    Options Database Keys:
4135 +  -mat_no_inode  - Do not use inodes
4136 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4137 -  -mat_aij_oneindex - Internally use indexing starting at 1
4138         rather than 0.  Note that when calling MatSetValues(),
4139         the user still MUST index entries starting at 0!
4140 
4141 
4142    Example usage:
4143 
4144    Consider the following 8x8 matrix with 34 non-zero values, that is
4145    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4146    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4147    as follows
4148 
4149 .vb
4150             1  2  0  |  0  3  0  |  0  4
4151     Proc0   0  5  6  |  7  0  0  |  8  0
4152             9  0 10  | 11  0  0  | 12  0
4153     -------------------------------------
4154            13  0 14  | 15 16 17  |  0  0
4155     Proc1   0 18  0  | 19 20 21  |  0  0
4156             0  0  0  | 22 23  0  | 24  0
4157     -------------------------------------
4158     Proc2  25 26 27  |  0  0 28  | 29  0
4159            30  0  0  | 31 32 33  |  0 34
4160 .ve
4161 
4162    This can be represented as a collection of submatrices as
4163 
4164 .vb
4165       A B C
4166       D E F
4167       G H I
4168 .ve
4169 
4170    Where the submatrices A,B,C are owned by proc0, D,E,F are
4171    owned by proc1, G,H,I are owned by proc2.
4172 
4173    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4174    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4175    The 'M','N' parameters are 8,8, and have the same values on all procs.
4176 
4177    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4178    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4179    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4180    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4181    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4182    matrix, ans [DF] as another SeqAIJ matrix.
4183 
4184    When d_nz, o_nz parameters are specified, d_nz storage elements are
4185    allocated for every row of the local diagonal submatrix, and o_nz
4186    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4187    One way to choose d_nz and o_nz is to use the max nonzerors per local
4188    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4189    In this case, the values of d_nz,o_nz are
4190 .vb
4191      proc0 : dnz = 2, o_nz = 2
4192      proc1 : dnz = 3, o_nz = 2
4193      proc2 : dnz = 1, o_nz = 4
4194 .ve
4195    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4196    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4197    for proc3. i.e we are using 12+15+10=37 storage locations to store
4198    34 values.
4199 
4200    When d_nnz, o_nnz parameters are specified, the storage is specified
4201    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4202    In the above case the values for d_nnz,o_nnz are
4203 .vb
4204      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4205      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4206      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4207 .ve
4208    Here the space allocated is sum of all the above values i.e 34, and
4209    hence pre-allocation is perfect.
4210 
4211    Level: intermediate
4212 
4213 .keywords: matrix, aij, compressed row, sparse, parallel
4214 
4215 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4216           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4217 @*/
4218 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4219 {
4220   PetscErrorCode ierr;
4221   PetscMPIInt    size;
4222 
4223   PetscFunctionBegin;
4224   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4225   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4226   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4227   if (size > 1) {
4228     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4229     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4230   } else {
4231     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4232     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4233   }
4234   PetscFunctionReturn(0);
4235 }
4236 
4237 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4238 {
4239   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4240   PetscBool      flg;
4241   PetscErrorCode ierr;
4242 
4243   PetscFunctionBegin;
4244   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4245   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4246   if (Ad)     *Ad     = a->A;
4247   if (Ao)     *Ao     = a->B;
4248   if (colmap) *colmap = a->garray;
4249   PetscFunctionReturn(0);
4250 }
4251 
4252 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4253 {
4254   PetscErrorCode ierr;
4255   PetscInt       m,N,i,rstart,nnz,Ii;
4256   PetscInt       *indx;
4257   PetscScalar    *values;
4258 
4259   PetscFunctionBegin;
4260   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4261   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4262     PetscInt       *dnz,*onz,sum,bs,cbs;
4263 
4264     if (n == PETSC_DECIDE) {
4265       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4266     }
4267     /* Check sum(n) = N */
4268     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4269     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4270 
4271     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4272     rstart -= m;
4273 
4274     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4275     for (i=0; i<m; i++) {
4276       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4277       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4278       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4279     }
4280 
4281     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4282     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4283     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4284     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4285     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4286     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4287     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4288     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4289   }
4290 
4291   /* numeric phase */
4292   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4293   for (i=0; i<m; i++) {
4294     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4295     Ii   = i + rstart;
4296     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4297     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4298   }
4299   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4300   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4301   PetscFunctionReturn(0);
4302 }
4303 
4304 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4305 {
4306   PetscErrorCode    ierr;
4307   PetscMPIInt       rank;
4308   PetscInt          m,N,i,rstart,nnz;
4309   size_t            len;
4310   const PetscInt    *indx;
4311   PetscViewer       out;
4312   char              *name;
4313   Mat               B;
4314   const PetscScalar *values;
4315 
4316   PetscFunctionBegin;
4317   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4318   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4319   /* Should this be the type of the diagonal block of A? */
4320   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4321   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4322   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4323   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4324   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4325   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4326   for (i=0; i<m; i++) {
4327     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4328     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4329     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4330   }
4331   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4332   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4333 
4334   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4335   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4336   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4337   sprintf(name,"%s.%d",outfile,rank);
4338   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4339   ierr = PetscFree(name);CHKERRQ(ierr);
4340   ierr = MatView(B,out);CHKERRQ(ierr);
4341   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4342   ierr = MatDestroy(&B);CHKERRQ(ierr);
4343   PetscFunctionReturn(0);
4344 }
4345 
4346 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4347 {
4348   PetscErrorCode      ierr;
4349   Mat_Merge_SeqsToMPI *merge;
4350   PetscContainer      container;
4351 
4352   PetscFunctionBegin;
4353   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4354   if (container) {
4355     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4356     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4357     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4358     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4359     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4360     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4361     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4362     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4363     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4364     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4365     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4366     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4367     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4368     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4369     ierr = PetscFree(merge);CHKERRQ(ierr);
4370     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4371   }
4372   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4373   PetscFunctionReturn(0);
4374 }
4375 
4376 #include <../src/mat/utils/freespace.h>
4377 #include <petscbt.h>
4378 
4379 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4380 {
4381   PetscErrorCode      ierr;
4382   MPI_Comm            comm;
4383   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4384   PetscMPIInt         size,rank,taga,*len_s;
4385   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4386   PetscInt            proc,m;
4387   PetscInt            **buf_ri,**buf_rj;
4388   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4389   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4390   MPI_Request         *s_waits,*r_waits;
4391   MPI_Status          *status;
4392   MatScalar           *aa=a->a;
4393   MatScalar           **abuf_r,*ba_i;
4394   Mat_Merge_SeqsToMPI *merge;
4395   PetscContainer      container;
4396 
4397   PetscFunctionBegin;
4398   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4399   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4400 
4401   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4402   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4403 
4404   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4405   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4406 
4407   bi     = merge->bi;
4408   bj     = merge->bj;
4409   buf_ri = merge->buf_ri;
4410   buf_rj = merge->buf_rj;
4411 
4412   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4413   owners = merge->rowmap->range;
4414   len_s  = merge->len_s;
4415 
4416   /* send and recv matrix values */
4417   /*-----------------------------*/
4418   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4419   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4420 
4421   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4422   for (proc=0,k=0; proc<size; proc++) {
4423     if (!len_s[proc]) continue;
4424     i    = owners[proc];
4425     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4426     k++;
4427   }
4428 
4429   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4430   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4431   ierr = PetscFree(status);CHKERRQ(ierr);
4432 
4433   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4434   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4435 
4436   /* insert mat values of mpimat */
4437   /*----------------------------*/
4438   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4439   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4440 
4441   for (k=0; k<merge->nrecv; k++) {
4442     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4443     nrows       = *(buf_ri_k[k]);
4444     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4445     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4446   }
4447 
4448   /* set values of ba */
4449   m = merge->rowmap->n;
4450   for (i=0; i<m; i++) {
4451     arow = owners[rank] + i;
4452     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4453     bnzi = bi[i+1] - bi[i];
4454     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4455 
4456     /* add local non-zero vals of this proc's seqmat into ba */
4457     anzi   = ai[arow+1] - ai[arow];
4458     aj     = a->j + ai[arow];
4459     aa     = a->a + ai[arow];
4460     nextaj = 0;
4461     for (j=0; nextaj<anzi; j++) {
4462       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4463         ba_i[j] += aa[nextaj++];
4464       }
4465     }
4466 
4467     /* add received vals into ba */
4468     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4469       /* i-th row */
4470       if (i == *nextrow[k]) {
4471         anzi   = *(nextai[k]+1) - *nextai[k];
4472         aj     = buf_rj[k] + *(nextai[k]);
4473         aa     = abuf_r[k] + *(nextai[k]);
4474         nextaj = 0;
4475         for (j=0; nextaj<anzi; j++) {
4476           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4477             ba_i[j] += aa[nextaj++];
4478           }
4479         }
4480         nextrow[k]++; nextai[k]++;
4481       }
4482     }
4483     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4484   }
4485   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4486   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4487 
4488   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4489   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4490   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4491   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4492   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4493   PetscFunctionReturn(0);
4494 }
4495 
4496 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4497 {
4498   PetscErrorCode      ierr;
4499   Mat                 B_mpi;
4500   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4501   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4502   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4503   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4504   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4505   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4506   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4507   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4508   MPI_Status          *status;
4509   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4510   PetscBT             lnkbt;
4511   Mat_Merge_SeqsToMPI *merge;
4512   PetscContainer      container;
4513 
4514   PetscFunctionBegin;
4515   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4516 
4517   /* make sure it is a PETSc comm */
4518   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4519   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4520   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4521 
4522   ierr = PetscNew(&merge);CHKERRQ(ierr);
4523   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4524 
4525   /* determine row ownership */
4526   /*---------------------------------------------------------*/
4527   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4528   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4529   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4530   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4531   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4532   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4533   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4534 
4535   m      = merge->rowmap->n;
4536   owners = merge->rowmap->range;
4537 
4538   /* determine the number of messages to send, their lengths */
4539   /*---------------------------------------------------------*/
4540   len_s = merge->len_s;
4541 
4542   len          = 0; /* length of buf_si[] */
4543   merge->nsend = 0;
4544   for (proc=0; proc<size; proc++) {
4545     len_si[proc] = 0;
4546     if (proc == rank) {
4547       len_s[proc] = 0;
4548     } else {
4549       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4550       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4551     }
4552     if (len_s[proc]) {
4553       merge->nsend++;
4554       nrows = 0;
4555       for (i=owners[proc]; i<owners[proc+1]; i++) {
4556         if (ai[i+1] > ai[i]) nrows++;
4557       }
4558       len_si[proc] = 2*(nrows+1);
4559       len         += len_si[proc];
4560     }
4561   }
4562 
4563   /* determine the number and length of messages to receive for ij-structure */
4564   /*-------------------------------------------------------------------------*/
4565   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4566   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4567 
4568   /* post the Irecv of j-structure */
4569   /*-------------------------------*/
4570   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4571   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4572 
4573   /* post the Isend of j-structure */
4574   /*--------------------------------*/
4575   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4576 
4577   for (proc=0, k=0; proc<size; proc++) {
4578     if (!len_s[proc]) continue;
4579     i    = owners[proc];
4580     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4581     k++;
4582   }
4583 
4584   /* receives and sends of j-structure are complete */
4585   /*------------------------------------------------*/
4586   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4587   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4588 
4589   /* send and recv i-structure */
4590   /*---------------------------*/
4591   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4592   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4593 
4594   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4595   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4596   for (proc=0,k=0; proc<size; proc++) {
4597     if (!len_s[proc]) continue;
4598     /* form outgoing message for i-structure:
4599          buf_si[0]:                 nrows to be sent
4600                [1:nrows]:           row index (global)
4601                [nrows+1:2*nrows+1]: i-structure index
4602     */
4603     /*-------------------------------------------*/
4604     nrows       = len_si[proc]/2 - 1;
4605     buf_si_i    = buf_si + nrows+1;
4606     buf_si[0]   = nrows;
4607     buf_si_i[0] = 0;
4608     nrows       = 0;
4609     for (i=owners[proc]; i<owners[proc+1]; i++) {
4610       anzi = ai[i+1] - ai[i];
4611       if (anzi) {
4612         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4613         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4614         nrows++;
4615       }
4616     }
4617     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4618     k++;
4619     buf_si += len_si[proc];
4620   }
4621 
4622   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4623   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4624 
4625   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4626   for (i=0; i<merge->nrecv; i++) {
4627     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4628   }
4629 
4630   ierr = PetscFree(len_si);CHKERRQ(ierr);
4631   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4632   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4633   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4634   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4635   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4636   ierr = PetscFree(status);CHKERRQ(ierr);
4637 
4638   /* compute a local seq matrix in each processor */
4639   /*----------------------------------------------*/
4640   /* allocate bi array and free space for accumulating nonzero column info */
4641   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4642   bi[0] = 0;
4643 
4644   /* create and initialize a linked list */
4645   nlnk = N+1;
4646   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4647 
4648   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4649   len  = ai[owners[rank+1]] - ai[owners[rank]];
4650   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4651 
4652   current_space = free_space;
4653 
4654   /* determine symbolic info for each local row */
4655   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4656 
4657   for (k=0; k<merge->nrecv; k++) {
4658     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4659     nrows       = *buf_ri_k[k];
4660     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4661     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4662   }
4663 
4664   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4665   len  = 0;
4666   for (i=0; i<m; i++) {
4667     bnzi = 0;
4668     /* add local non-zero cols of this proc's seqmat into lnk */
4669     arow  = owners[rank] + i;
4670     anzi  = ai[arow+1] - ai[arow];
4671     aj    = a->j + ai[arow];
4672     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4673     bnzi += nlnk;
4674     /* add received col data into lnk */
4675     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4676       if (i == *nextrow[k]) { /* i-th row */
4677         anzi  = *(nextai[k]+1) - *nextai[k];
4678         aj    = buf_rj[k] + *nextai[k];
4679         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4680         bnzi += nlnk;
4681         nextrow[k]++; nextai[k]++;
4682       }
4683     }
4684     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4685 
4686     /* if free space is not available, make more free space */
4687     if (current_space->local_remaining<bnzi) {
4688       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4689       nspacedouble++;
4690     }
4691     /* copy data into free space, then initialize lnk */
4692     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4693     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4694 
4695     current_space->array           += bnzi;
4696     current_space->local_used      += bnzi;
4697     current_space->local_remaining -= bnzi;
4698 
4699     bi[i+1] = bi[i] + bnzi;
4700   }
4701 
4702   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4703 
4704   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4705   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4706   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4707 
4708   /* create symbolic parallel matrix B_mpi */
4709   /*---------------------------------------*/
4710   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4711   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4712   if (n==PETSC_DECIDE) {
4713     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4714   } else {
4715     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4716   }
4717   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4718   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4719   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4720   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4721   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4722 
4723   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4724   B_mpi->assembled    = PETSC_FALSE;
4725   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4726   merge->bi           = bi;
4727   merge->bj           = bj;
4728   merge->buf_ri       = buf_ri;
4729   merge->buf_rj       = buf_rj;
4730   merge->coi          = NULL;
4731   merge->coj          = NULL;
4732   merge->owners_co    = NULL;
4733 
4734   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4735 
4736   /* attach the supporting struct to B_mpi for reuse */
4737   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4738   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4739   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4740   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4741   *mpimat = B_mpi;
4742 
4743   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4744   PetscFunctionReturn(0);
4745 }
4746 
4747 /*@C
4748       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4749                  matrices from each processor
4750 
4751     Collective on MPI_Comm
4752 
4753    Input Parameters:
4754 +    comm - the communicators the parallel matrix will live on
4755 .    seqmat - the input sequential matrices
4756 .    m - number of local rows (or PETSC_DECIDE)
4757 .    n - number of local columns (or PETSC_DECIDE)
4758 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4759 
4760    Output Parameter:
4761 .    mpimat - the parallel matrix generated
4762 
4763     Level: advanced
4764 
4765    Notes:
4766      The dimensions of the sequential matrix in each processor MUST be the same.
4767      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4768      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4769 @*/
4770 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4771 {
4772   PetscErrorCode ierr;
4773   PetscMPIInt    size;
4774 
4775   PetscFunctionBegin;
4776   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4777   if (size == 1) {
4778     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4779     if (scall == MAT_INITIAL_MATRIX) {
4780       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4781     } else {
4782       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4783     }
4784     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4785     PetscFunctionReturn(0);
4786   }
4787   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4788   if (scall == MAT_INITIAL_MATRIX) {
4789     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4790   }
4791   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4792   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4793   PetscFunctionReturn(0);
4794 }
4795 
4796 /*@
4797      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4798           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4799           with MatGetSize()
4800 
4801     Not Collective
4802 
4803    Input Parameters:
4804 +    A - the matrix
4805 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4806 
4807    Output Parameter:
4808 .    A_loc - the local sequential matrix generated
4809 
4810     Level: developer
4811 
4812 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4813 
4814 @*/
4815 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4816 {
4817   PetscErrorCode ierr;
4818   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4819   Mat_SeqAIJ     *mat,*a,*b;
4820   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4821   MatScalar      *aa,*ba,*cam;
4822   PetscScalar    *ca;
4823   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4824   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4825   PetscBool      match;
4826   MPI_Comm       comm;
4827   PetscMPIInt    size;
4828 
4829   PetscFunctionBegin;
4830   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4831   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4832   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4833   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4834   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4835 
4836   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4837   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4838   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4839   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4840   aa = a->a; ba = b->a;
4841   if (scall == MAT_INITIAL_MATRIX) {
4842     if (size == 1) {
4843       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4844       PetscFunctionReturn(0);
4845     }
4846 
4847     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4848     ci[0] = 0;
4849     for (i=0; i<am; i++) {
4850       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4851     }
4852     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4853     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4854     k    = 0;
4855     for (i=0; i<am; i++) {
4856       ncols_o = bi[i+1] - bi[i];
4857       ncols_d = ai[i+1] - ai[i];
4858       /* off-diagonal portion of A */
4859       for (jo=0; jo<ncols_o; jo++) {
4860         col = cmap[*bj];
4861         if (col >= cstart) break;
4862         cj[k]   = col; bj++;
4863         ca[k++] = *ba++;
4864       }
4865       /* diagonal portion of A */
4866       for (j=0; j<ncols_d; j++) {
4867         cj[k]   = cstart + *aj++;
4868         ca[k++] = *aa++;
4869       }
4870       /* off-diagonal portion of A */
4871       for (j=jo; j<ncols_o; j++) {
4872         cj[k]   = cmap[*bj++];
4873         ca[k++] = *ba++;
4874       }
4875     }
4876     /* put together the new matrix */
4877     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4878     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4879     /* Since these are PETSc arrays, change flags to free them as necessary. */
4880     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4881     mat->free_a  = PETSC_TRUE;
4882     mat->free_ij = PETSC_TRUE;
4883     mat->nonew   = 0;
4884   } else if (scall == MAT_REUSE_MATRIX) {
4885     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4886     ci = mat->i; cj = mat->j; cam = mat->a;
4887     for (i=0; i<am; i++) {
4888       /* off-diagonal portion of A */
4889       ncols_o = bi[i+1] - bi[i];
4890       for (jo=0; jo<ncols_o; jo++) {
4891         col = cmap[*bj];
4892         if (col >= cstart) break;
4893         *cam++ = *ba++; bj++;
4894       }
4895       /* diagonal portion of A */
4896       ncols_d = ai[i+1] - ai[i];
4897       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4898       /* off-diagonal portion of A */
4899       for (j=jo; j<ncols_o; j++) {
4900         *cam++ = *ba++; bj++;
4901       }
4902     }
4903   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4904   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4905   PetscFunctionReturn(0);
4906 }
4907 
4908 /*@C
4909      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4910 
4911     Not Collective
4912 
4913    Input Parameters:
4914 +    A - the matrix
4915 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4916 -    row, col - index sets of rows and columns to extract (or NULL)
4917 
4918    Output Parameter:
4919 .    A_loc - the local sequential matrix generated
4920 
4921     Level: developer
4922 
4923 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4924 
4925 @*/
4926 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4927 {
4928   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4929   PetscErrorCode ierr;
4930   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4931   IS             isrowa,iscola;
4932   Mat            *aloc;
4933   PetscBool      match;
4934 
4935   PetscFunctionBegin;
4936   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4937   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4938   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4939   if (!row) {
4940     start = A->rmap->rstart; end = A->rmap->rend;
4941     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4942   } else {
4943     isrowa = *row;
4944   }
4945   if (!col) {
4946     start = A->cmap->rstart;
4947     cmap  = a->garray;
4948     nzA   = a->A->cmap->n;
4949     nzB   = a->B->cmap->n;
4950     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4951     ncols = 0;
4952     for (i=0; i<nzB; i++) {
4953       if (cmap[i] < start) idx[ncols++] = cmap[i];
4954       else break;
4955     }
4956     imark = i;
4957     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4958     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4959     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4960   } else {
4961     iscola = *col;
4962   }
4963   if (scall != MAT_INITIAL_MATRIX) {
4964     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4965     aloc[0] = *A_loc;
4966   }
4967   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4968   *A_loc = aloc[0];
4969   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4970   if (!row) {
4971     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4972   }
4973   if (!col) {
4974     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4975   }
4976   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4977   PetscFunctionReturn(0);
4978 }
4979 
4980 /*@C
4981     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4982 
4983     Collective on Mat
4984 
4985    Input Parameters:
4986 +    A,B - the matrices in mpiaij format
4987 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4988 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4989 
4990    Output Parameter:
4991 +    rowb, colb - index sets of rows and columns of B to extract
4992 -    B_seq - the sequential matrix generated
4993 
4994     Level: developer
4995 
4996 @*/
4997 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4998 {
4999   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5000   PetscErrorCode ierr;
5001   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5002   IS             isrowb,iscolb;
5003   Mat            *bseq=NULL;
5004 
5005   PetscFunctionBegin;
5006   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5007     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5008   }
5009   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5010 
5011   if (scall == MAT_INITIAL_MATRIX) {
5012     start = A->cmap->rstart;
5013     cmap  = a->garray;
5014     nzA   = a->A->cmap->n;
5015     nzB   = a->B->cmap->n;
5016     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5017     ncols = 0;
5018     for (i=0; i<nzB; i++) {  /* row < local row index */
5019       if (cmap[i] < start) idx[ncols++] = cmap[i];
5020       else break;
5021     }
5022     imark = i;
5023     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5024     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5025     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5026     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5027   } else {
5028     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5029     isrowb  = *rowb; iscolb = *colb;
5030     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5031     bseq[0] = *B_seq;
5032   }
5033   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5034   *B_seq = bseq[0];
5035   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5036   if (!rowb) {
5037     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5038   } else {
5039     *rowb = isrowb;
5040   }
5041   if (!colb) {
5042     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5043   } else {
5044     *colb = iscolb;
5045   }
5046   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5047   PetscFunctionReturn(0);
5048 }
5049 
5050 /*
5051     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5052     of the OFF-DIAGONAL portion of local A
5053 
5054     Collective on Mat
5055 
5056    Input Parameters:
5057 +    A,B - the matrices in mpiaij format
5058 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5059 
5060    Output Parameter:
5061 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5062 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5063 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5064 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5065 
5066     Level: developer
5067 
5068 */
5069 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5070 {
5071   VecScatter_MPI_General *gen_to,*gen_from;
5072   PetscErrorCode         ierr;
5073   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5074   Mat_SeqAIJ             *b_oth;
5075   VecScatter             ctx =a->Mvctx;
5076   MPI_Comm               comm;
5077   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5078   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5079   PetscInt               *rvalues,*svalues;
5080   MatScalar              *b_otha,*bufa,*bufA;
5081   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5082   MPI_Request            *rwaits = NULL,*swaits = NULL;
5083   MPI_Status             *sstatus,rstatus;
5084   PetscMPIInt            jj,size;
5085   PetscInt               *cols,sbs,rbs;
5086   PetscScalar            *vals;
5087 
5088   PetscFunctionBegin;
5089   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5090   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5091 
5092   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5093     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5094   }
5095   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5096   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5097 
5098   if (size == 1) {
5099     startsj_s = NULL;
5100     bufa_ptr  = NULL;
5101     *B_oth    = NULL;
5102     PetscFunctionReturn(0);
5103   }
5104 
5105   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5106   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5107   nrecvs   = gen_from->n;
5108   nsends   = gen_to->n;
5109 
5110   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5111   srow    = gen_to->indices;    /* local row index to be sent */
5112   sstarts = gen_to->starts;
5113   sprocs  = gen_to->procs;
5114   sstatus = gen_to->sstatus;
5115   sbs     = gen_to->bs;
5116   rstarts = gen_from->starts;
5117   rprocs  = gen_from->procs;
5118   rbs     = gen_from->bs;
5119 
5120   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5121   if (scall == MAT_INITIAL_MATRIX) {
5122     /* i-array */
5123     /*---------*/
5124     /*  post receives */
5125     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5126     for (i=0; i<nrecvs; i++) {
5127       rowlen = rvalues + rstarts[i]*rbs;
5128       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5129       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5130     }
5131 
5132     /* pack the outgoing message */
5133     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5134 
5135     sstartsj[0] = 0;
5136     rstartsj[0] = 0;
5137     len         = 0; /* total length of j or a array to be sent */
5138     k           = 0;
5139     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5140     for (i=0; i<nsends; i++) {
5141       rowlen = svalues + sstarts[i]*sbs;
5142       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5143       for (j=0; j<nrows; j++) {
5144         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5145         for (l=0; l<sbs; l++) {
5146           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5147 
5148           rowlen[j*sbs+l] = ncols;
5149 
5150           len += ncols;
5151           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5152         }
5153         k++;
5154       }
5155       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5156 
5157       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5158     }
5159     /* recvs and sends of i-array are completed */
5160     i = nrecvs;
5161     while (i--) {
5162       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5163     }
5164     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5165     ierr = PetscFree(svalues);CHKERRQ(ierr);
5166 
5167     /* allocate buffers for sending j and a arrays */
5168     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5169     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5170 
5171     /* create i-array of B_oth */
5172     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5173 
5174     b_othi[0] = 0;
5175     len       = 0; /* total length of j or a array to be received */
5176     k         = 0;
5177     for (i=0; i<nrecvs; i++) {
5178       rowlen = rvalues + rstarts[i]*rbs;
5179       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5180       for (j=0; j<nrows; j++) {
5181         b_othi[k+1] = b_othi[k] + rowlen[j];
5182         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5183         k++;
5184       }
5185       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5186     }
5187     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5188 
5189     /* allocate space for j and a arrrays of B_oth */
5190     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5191     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5192 
5193     /* j-array */
5194     /*---------*/
5195     /*  post receives of j-array */
5196     for (i=0; i<nrecvs; i++) {
5197       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5198       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5199     }
5200 
5201     /* pack the outgoing message j-array */
5202     k = 0;
5203     for (i=0; i<nsends; i++) {
5204       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5205       bufJ  = bufj+sstartsj[i];
5206       for (j=0; j<nrows; j++) {
5207         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5208         for (ll=0; ll<sbs; ll++) {
5209           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5210           for (l=0; l<ncols; l++) {
5211             *bufJ++ = cols[l];
5212           }
5213           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5214         }
5215       }
5216       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5217     }
5218 
5219     /* recvs and sends of j-array are completed */
5220     i = nrecvs;
5221     while (i--) {
5222       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5223     }
5224     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5225   } else if (scall == MAT_REUSE_MATRIX) {
5226     sstartsj = *startsj_s;
5227     rstartsj = *startsj_r;
5228     bufa     = *bufa_ptr;
5229     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5230     b_otha   = b_oth->a;
5231   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5232 
5233   /* a-array */
5234   /*---------*/
5235   /*  post receives of a-array */
5236   for (i=0; i<nrecvs; i++) {
5237     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5238     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5239   }
5240 
5241   /* pack the outgoing message a-array */
5242   k = 0;
5243   for (i=0; i<nsends; i++) {
5244     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5245     bufA  = bufa+sstartsj[i];
5246     for (j=0; j<nrows; j++) {
5247       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5248       for (ll=0; ll<sbs; ll++) {
5249         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5250         for (l=0; l<ncols; l++) {
5251           *bufA++ = vals[l];
5252         }
5253         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5254       }
5255     }
5256     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5257   }
5258   /* recvs and sends of a-array are completed */
5259   i = nrecvs;
5260   while (i--) {
5261     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5262   }
5263   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5264   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5265 
5266   if (scall == MAT_INITIAL_MATRIX) {
5267     /* put together the new matrix */
5268     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5269 
5270     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5271     /* Since these are PETSc arrays, change flags to free them as necessary. */
5272     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5273     b_oth->free_a  = PETSC_TRUE;
5274     b_oth->free_ij = PETSC_TRUE;
5275     b_oth->nonew   = 0;
5276 
5277     ierr = PetscFree(bufj);CHKERRQ(ierr);
5278     if (!startsj_s || !bufa_ptr) {
5279       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5280       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5281     } else {
5282       *startsj_s = sstartsj;
5283       *startsj_r = rstartsj;
5284       *bufa_ptr  = bufa;
5285     }
5286   }
5287   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5288   PetscFunctionReturn(0);
5289 }
5290 
5291 /*@C
5292   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5293 
5294   Not Collective
5295 
5296   Input Parameters:
5297 . A - The matrix in mpiaij format
5298 
5299   Output Parameter:
5300 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5301 . colmap - A map from global column index to local index into lvec
5302 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5303 
5304   Level: developer
5305 
5306 @*/
5307 #if defined(PETSC_USE_CTABLE)
5308 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5309 #else
5310 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5311 #endif
5312 {
5313   Mat_MPIAIJ *a;
5314 
5315   PetscFunctionBegin;
5316   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5317   PetscValidPointer(lvec, 2);
5318   PetscValidPointer(colmap, 3);
5319   PetscValidPointer(multScatter, 4);
5320   a = (Mat_MPIAIJ*) A->data;
5321   if (lvec) *lvec = a->lvec;
5322   if (colmap) *colmap = a->colmap;
5323   if (multScatter) *multScatter = a->Mvctx;
5324   PetscFunctionReturn(0);
5325 }
5326 
5327 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5328 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5329 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5330 #if defined(PETSC_HAVE_ELEMENTAL)
5331 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5332 #endif
5333 #if defined(PETSC_HAVE_HYPRE)
5334 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5335 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5336 #endif
5337 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5338 
5339 /*
5340     Computes (B'*A')' since computing B*A directly is untenable
5341 
5342                n                       p                          p
5343         (              )       (              )         (                  )
5344       m (      A       )  *  n (       B      )   =   m (         C        )
5345         (              )       (              )         (                  )
5346 
5347 */
5348 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5349 {
5350   PetscErrorCode ierr;
5351   Mat            At,Bt,Ct;
5352 
5353   PetscFunctionBegin;
5354   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5355   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5356   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5357   ierr = MatDestroy(&At);CHKERRQ(ierr);
5358   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5359   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5360   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5361   PetscFunctionReturn(0);
5362 }
5363 
5364 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5365 {
5366   PetscErrorCode ierr;
5367   PetscInt       m=A->rmap->n,n=B->cmap->n;
5368   Mat            Cmat;
5369 
5370   PetscFunctionBegin;
5371   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5372   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5373   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5374   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5375   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5376   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5377   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5378   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5379 
5380   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5381 
5382   *C = Cmat;
5383   PetscFunctionReturn(0);
5384 }
5385 
5386 /* ----------------------------------------------------------------*/
5387 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5388 {
5389   PetscErrorCode ierr;
5390 
5391   PetscFunctionBegin;
5392   if (scall == MAT_INITIAL_MATRIX) {
5393     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5394     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5395     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5396   }
5397   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5398   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5399   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5400   PetscFunctionReturn(0);
5401 }
5402 
5403 /*MC
5404    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5405 
5406    Options Database Keys:
5407 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5408 
5409   Level: beginner
5410 
5411 .seealso: MatCreateAIJ()
5412 M*/
5413 
5414 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5415 {
5416   Mat_MPIAIJ     *b;
5417   PetscErrorCode ierr;
5418   PetscMPIInt    size;
5419 
5420   PetscFunctionBegin;
5421   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5422 
5423   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5424   B->data       = (void*)b;
5425   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5426   B->assembled  = PETSC_FALSE;
5427   B->insertmode = NOT_SET_VALUES;
5428   b->size       = size;
5429 
5430   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5431 
5432   /* build cache for off array entries formed */
5433   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5434 
5435   b->donotstash  = PETSC_FALSE;
5436   b->colmap      = 0;
5437   b->garray      = 0;
5438   b->roworiented = PETSC_TRUE;
5439 
5440   /* stuff used for matrix vector multiply */
5441   b->lvec  = NULL;
5442   b->Mvctx = NULL;
5443 
5444   /* stuff for MatGetRow() */
5445   b->rowindices   = 0;
5446   b->rowvalues    = 0;
5447   b->getrowactive = PETSC_FALSE;
5448 
5449   /* flexible pointer used in CUSP/CUSPARSE classes */
5450   b->spptr = NULL;
5451 
5452   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5453   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5454   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5455   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5456   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5457   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5458   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5459   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5460   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5461   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5462 #if defined(PETSC_HAVE_ELEMENTAL)
5463   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5464 #endif
5465 #if defined(PETSC_HAVE_HYPRE)
5466   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5467 #endif
5468   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5469   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5470   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5471   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5472 #if defined(PETSC_HAVE_HYPRE)
5473   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5474 #endif
5475   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5476   PetscFunctionReturn(0);
5477 }
5478 
5479 /*@C
5480      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5481          and "off-diagonal" part of the matrix in CSR format.
5482 
5483    Collective on MPI_Comm
5484 
5485    Input Parameters:
5486 +  comm - MPI communicator
5487 .  m - number of local rows (Cannot be PETSC_DECIDE)
5488 .  n - This value should be the same as the local size used in creating the
5489        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5490        calculated if N is given) For square matrices n is almost always m.
5491 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5492 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5493 .   i - row indices for "diagonal" portion of matrix
5494 .   j - column indices
5495 .   a - matrix values
5496 .   oi - row indices for "off-diagonal" portion of matrix
5497 .   oj - column indices
5498 -   oa - matrix values
5499 
5500    Output Parameter:
5501 .   mat - the matrix
5502 
5503    Level: advanced
5504 
5505    Notes:
5506        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5507        must free the arrays once the matrix has been destroyed and not before.
5508 
5509        The i and j indices are 0 based
5510 
5511        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5512 
5513        This sets local rows and cannot be used to set off-processor values.
5514 
5515        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5516        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5517        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5518        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5519        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5520        communication if it is known that only local entries will be set.
5521 
5522 .keywords: matrix, aij, compressed row, sparse, parallel
5523 
5524 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5525           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5526 @*/
5527 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5528 {
5529   PetscErrorCode ierr;
5530   Mat_MPIAIJ     *maij;
5531 
5532   PetscFunctionBegin;
5533   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5534   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5535   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5536   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5537   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5538   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5539   maij = (Mat_MPIAIJ*) (*mat)->data;
5540 
5541   (*mat)->preallocated = PETSC_TRUE;
5542 
5543   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5544   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5545 
5546   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5547   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5548 
5549   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5550   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5551   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5552   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5553 
5554   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5555   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5556   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5557   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5558   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5559   PetscFunctionReturn(0);
5560 }
5561 
5562 /*
5563     Special version for direct calls from Fortran
5564 */
5565 #include <petsc/private/fortranimpl.h>
5566 
5567 /* Change these macros so can be used in void function */
5568 #undef CHKERRQ
5569 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5570 #undef SETERRQ2
5571 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5572 #undef SETERRQ3
5573 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5574 #undef SETERRQ
5575 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5576 
5577 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5578 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5579 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5580 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5581 #else
5582 #endif
5583 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5584 {
5585   Mat            mat  = *mmat;
5586   PetscInt       m    = *mm, n = *mn;
5587   InsertMode     addv = *maddv;
5588   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5589   PetscScalar    value;
5590   PetscErrorCode ierr;
5591 
5592   MatCheckPreallocated(mat,1);
5593   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5594 
5595 #if defined(PETSC_USE_DEBUG)
5596   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5597 #endif
5598   {
5599     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5600     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5601     PetscBool roworiented = aij->roworiented;
5602 
5603     /* Some Variables required in the macro */
5604     Mat        A                 = aij->A;
5605     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5606     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5607     MatScalar  *aa               = a->a;
5608     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5609     Mat        B                 = aij->B;
5610     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5611     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5612     MatScalar  *ba               = b->a;
5613 
5614     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5615     PetscInt  nonew = a->nonew;
5616     MatScalar *ap1,*ap2;
5617 
5618     PetscFunctionBegin;
5619     for (i=0; i<m; i++) {
5620       if (im[i] < 0) continue;
5621 #if defined(PETSC_USE_DEBUG)
5622       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5623 #endif
5624       if (im[i] >= rstart && im[i] < rend) {
5625         row      = im[i] - rstart;
5626         lastcol1 = -1;
5627         rp1      = aj + ai[row];
5628         ap1      = aa + ai[row];
5629         rmax1    = aimax[row];
5630         nrow1    = ailen[row];
5631         low1     = 0;
5632         high1    = nrow1;
5633         lastcol2 = -1;
5634         rp2      = bj + bi[row];
5635         ap2      = ba + bi[row];
5636         rmax2    = bimax[row];
5637         nrow2    = bilen[row];
5638         low2     = 0;
5639         high2    = nrow2;
5640 
5641         for (j=0; j<n; j++) {
5642           if (roworiented) value = v[i*n+j];
5643           else value = v[i+j*m];
5644           if (in[j] >= cstart && in[j] < cend) {
5645             col = in[j] - cstart;
5646             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5647             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5648           } else if (in[j] < 0) continue;
5649 #if defined(PETSC_USE_DEBUG)
5650           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5651 #endif
5652           else {
5653             if (mat->was_assembled) {
5654               if (!aij->colmap) {
5655                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5656               }
5657 #if defined(PETSC_USE_CTABLE)
5658               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5659               col--;
5660 #else
5661               col = aij->colmap[in[j]] - 1;
5662 #endif
5663               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5664               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5665                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5666                 col  =  in[j];
5667                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5668                 B     = aij->B;
5669                 b     = (Mat_SeqAIJ*)B->data;
5670                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5671                 rp2   = bj + bi[row];
5672                 ap2   = ba + bi[row];
5673                 rmax2 = bimax[row];
5674                 nrow2 = bilen[row];
5675                 low2  = 0;
5676                 high2 = nrow2;
5677                 bm    = aij->B->rmap->n;
5678                 ba    = b->a;
5679               }
5680             } else col = in[j];
5681             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5682           }
5683         }
5684       } else if (!aij->donotstash) {
5685         if (roworiented) {
5686           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5687         } else {
5688           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5689         }
5690       }
5691     }
5692   }
5693   PetscFunctionReturnVoid();
5694 }
5695 
5696