xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3b00a383992d8d0bc7592ad380349764c9c3a1ce)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
967   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976   PetscBool      merged;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
980   /* do nondiagonal part */
981   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
982   if (!merged) {
983     /* send it on its way */
984     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
985     /* do local part */
986     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
987     /* receive remote parts: note this assumes the values are not actually */
988     /* added in yy until the next line, */
989     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990   } else {
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* send it on its way */
994     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995     /* values actually were received in the Begin() but we need to call this nop */
996     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1002 {
1003   MPI_Comm       comm;
1004   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1005   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1006   IS             Me,Notme;
1007   PetscErrorCode ierr;
1008   PetscInt       M,N,first,last,*notme,i;
1009   PetscMPIInt    size;
1010 
1011   PetscFunctionBegin;
1012   /* Easy test: symmetric diagonal block */
1013   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1014   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1015   if (!*f) PetscFunctionReturn(0);
1016   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1017   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1018   if (size == 1) PetscFunctionReturn(0);
1019 
1020   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1021   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1022   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1024   for (i=0; i<first; i++) notme[i] = i;
1025   for (i=last; i<M; i++) notme[i-last+first] = i;
1026   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1027   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1028   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1029   Aoff = Aoffs[0];
1030   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1031   Boff = Boffs[0];
1032   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1033   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1035   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1036   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1037   ierr = PetscFree(notme);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1042 {
1043   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1044   PetscErrorCode ierr;
1045 
1046   PetscFunctionBegin;
1047   /* do nondiagonal part */
1048   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1049   /* send it on its way */
1050   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1051   /* do local part */
1052   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1053   /* receive remote parts */
1054   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 /*
1059   This only works correctly for square matrices where the subblock A->A is the
1060    diagonal block
1061 */
1062 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1063 {
1064   PetscErrorCode ierr;
1065   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1066 
1067   PetscFunctionBegin;
1068   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1069   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1070   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1075 {
1076   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1077   PetscErrorCode ierr;
1078 
1079   PetscFunctionBegin;
1080   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1081   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1086 {
1087   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091 #if defined(PETSC_USE_LOG)
1092   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1093 #endif
1094   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1095   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1096   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1097   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1098 #if defined(PETSC_USE_CTABLE)
1099   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1100 #else
1101   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1102 #endif
1103   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1104   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1105   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1106   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1107   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1108   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1109 
1110   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1111   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1112   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1113   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1114   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1115   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1116   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1117   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1118 #if defined(PETSC_HAVE_ELEMENTAL)
1119   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1120 #endif
1121 #if defined(PETSC_HAVE_HYPRE)
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1124 #endif
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1129 {
1130   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1131   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1132   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1133   PetscErrorCode ierr;
1134   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1135   int            fd;
1136   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1137   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1138   PetscScalar    *column_values;
1139   PetscInt       message_count,flowcontrolcount;
1140   FILE           *file;
1141 
1142   PetscFunctionBegin;
1143   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1144   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1145   nz   = A->nz + B->nz;
1146   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1147   if (!rank) {
1148     header[0] = MAT_FILE_CLASSID;
1149     header[1] = mat->rmap->N;
1150     header[2] = mat->cmap->N;
1151 
1152     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1153     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1154     /* get largest number of rows any processor has */
1155     rlen  = mat->rmap->n;
1156     range = mat->rmap->range;
1157     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1158   } else {
1159     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1160     rlen = mat->rmap->n;
1161   }
1162 
1163   /* load up the local row counts */
1164   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1165   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1166 
1167   /* store the row lengths to the file */
1168   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1169   if (!rank) {
1170     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1171     for (i=1; i<size; i++) {
1172       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1173       rlen = range[i+1] - range[i];
1174       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1176     }
1177     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1178   } else {
1179     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1180     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1181     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1182   }
1183   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1184 
1185   /* load up the local column indices */
1186   nzmax = nz; /* th processor needs space a largest processor needs */
1187   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1188   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1189   cnt   = 0;
1190   for (i=0; i<mat->rmap->n; i++) {
1191     for (j=B->i[i]; j<B->i[i+1]; j++) {
1192       if ((col = garray[B->j[j]]) > cstart) break;
1193       column_indices[cnt++] = col;
1194     }
1195     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1196     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1197   }
1198   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1199 
1200   /* store the column indices to the file */
1201   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1202   if (!rank) {
1203     MPI_Status status;
1204     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     for (i=1; i<size; i++) {
1206       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1207       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1208       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1209       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1210       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1211     }
1212     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1213   } else {
1214     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1215     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1216     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1217     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1218   }
1219   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1220 
1221   /* load up the local column values */
1222   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1223   cnt  = 0;
1224   for (i=0; i<mat->rmap->n; i++) {
1225     for (j=B->i[i]; j<B->i[i+1]; j++) {
1226       if (garray[B->j[j]] > cstart) break;
1227       column_values[cnt++] = B->a[j];
1228     }
1229     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1230     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1231   }
1232   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1233 
1234   /* store the column values to the file */
1235   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1236   if (!rank) {
1237     MPI_Status status;
1238     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1239     for (i=1; i<size; i++) {
1240       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1241       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1242       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1243       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1244       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1245     }
1246     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1247   } else {
1248     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1249     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1250     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1251     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(column_values);CHKERRQ(ierr);
1254 
1255   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1256   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #include <petscdraw.h>
1261 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1262 {
1263   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1264   PetscErrorCode    ierr;
1265   PetscMPIInt       rank = aij->rank,size = aij->size;
1266   PetscBool         isdraw,iascii,isbinary;
1267   PetscViewer       sviewer;
1268   PetscViewerFormat format;
1269 
1270   PetscFunctionBegin;
1271   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1272   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1273   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1274   if (iascii) {
1275     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1276     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1277       MatInfo   info;
1278       PetscBool inodes;
1279 
1280       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1281       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1282       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1283       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1284       if (!inodes) {
1285         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1286                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1287       } else {
1288         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1289                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1290       }
1291       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1292       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1293       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1294       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1295       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1296       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1297       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1298       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1299       PetscFunctionReturn(0);
1300     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1301       PetscInt inodecount,inodelimit,*inodes;
1302       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1303       if (inodes) {
1304         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1305       } else {
1306         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1307       }
1308       PetscFunctionReturn(0);
1309     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1310       PetscFunctionReturn(0);
1311     }
1312   } else if (isbinary) {
1313     if (size == 1) {
1314       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1315       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1316     } else {
1317       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1318     }
1319     PetscFunctionReturn(0);
1320   } else if (isdraw) {
1321     PetscDraw draw;
1322     PetscBool isnull;
1323     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1324     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1325     if (isnull) PetscFunctionReturn(0);
1326   }
1327 
1328   {
1329     /* assemble the entire matrix onto first processor. */
1330     Mat        A;
1331     Mat_SeqAIJ *Aloc;
1332     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1333     MatScalar  *a;
1334 
1335     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1336     if (!rank) {
1337       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1338     } else {
1339       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1340     }
1341     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1342     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1343     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1344     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1345     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1346 
1347     /* copy over the A part */
1348     Aloc = (Mat_SeqAIJ*)aij->A->data;
1349     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1350     row  = mat->rmap->rstart;
1351     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1352     for (i=0; i<m; i++) {
1353       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1354       row++;
1355       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1356     }
1357     aj = Aloc->j;
1358     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1359 
1360     /* copy over the B part */
1361     Aloc = (Mat_SeqAIJ*)aij->B->data;
1362     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1363     row  = mat->rmap->rstart;
1364     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1365     ct   = cols;
1366     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1367     for (i=0; i<m; i++) {
1368       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1369       row++;
1370       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1371     }
1372     ierr = PetscFree(ct);CHKERRQ(ierr);
1373     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1374     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1375     /*
1376        Everyone has to call to draw the matrix since the graphics waits are
1377        synchronized across all processors that share the PetscDraw object
1378     */
1379     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1380     if (!rank) {
1381       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1382       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1383     }
1384     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1385     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1386     ierr = MatDestroy(&A);CHKERRQ(ierr);
1387   }
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1392 {
1393   PetscErrorCode ierr;
1394   PetscBool      iascii,isdraw,issocket,isbinary;
1395 
1396   PetscFunctionBegin;
1397   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1398   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1399   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1400   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1401   if (iascii || isdraw || isbinary || issocket) {
1402     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1408 {
1409   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1410   PetscErrorCode ierr;
1411   Vec            bb1 = 0;
1412   PetscBool      hasop;
1413 
1414   PetscFunctionBegin;
1415   if (flag == SOR_APPLY_UPPER) {
1416     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1417     PetscFunctionReturn(0);
1418   }
1419 
1420   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1421     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1422   }
1423 
1424   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1425     if (flag & SOR_ZERO_INITIAL_GUESS) {
1426       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1427       its--;
1428     }
1429 
1430     while (its--) {
1431       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1432       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1433 
1434       /* update rhs: bb1 = bb - B*x */
1435       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1436       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1437 
1438       /* local sweep */
1439       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1440     }
1441   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1442     if (flag & SOR_ZERO_INITIAL_GUESS) {
1443       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1444       its--;
1445     }
1446     while (its--) {
1447       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1448       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1449 
1450       /* update rhs: bb1 = bb - B*x */
1451       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1452       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1453 
1454       /* local sweep */
1455       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1456     }
1457   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1458     if (flag & SOR_ZERO_INITIAL_GUESS) {
1459       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1460       its--;
1461     }
1462     while (its--) {
1463       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1464       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1465 
1466       /* update rhs: bb1 = bb - B*x */
1467       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1468       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1469 
1470       /* local sweep */
1471       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1472     }
1473   } else if (flag & SOR_EISENSTAT) {
1474     Vec xx1;
1475 
1476     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1477     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1478 
1479     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1480     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1481     if (!mat->diag) {
1482       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1483       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1484     }
1485     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1486     if (hasop) {
1487       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1488     } else {
1489       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1490     }
1491     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1492 
1493     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1494 
1495     /* local sweep */
1496     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1497     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1498     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1499   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1500 
1501   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1502 
1503   matin->factorerrortype = mat->A->factorerrortype;
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1508 {
1509   Mat            aA,aB,Aperm;
1510   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1511   PetscScalar    *aa,*ba;
1512   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1513   PetscSF        rowsf,sf;
1514   IS             parcolp = NULL;
1515   PetscBool      done;
1516   PetscErrorCode ierr;
1517 
1518   PetscFunctionBegin;
1519   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1520   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1521   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1522   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1523 
1524   /* Invert row permutation to find out where my rows should go */
1525   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1526   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1527   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1528   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1529   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1530   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1531 
1532   /* Invert column permutation to find out where my columns should go */
1533   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1534   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1535   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1536   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1537   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1538   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1539   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1540 
1541   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1542   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1543   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1544 
1545   /* Find out where my gcols should go */
1546   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1547   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1548   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1549   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1550   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1551   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1552   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1553   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1554 
1555   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1556   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1557   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1558   for (i=0; i<m; i++) {
1559     PetscInt row = rdest[i],rowner;
1560     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1561     for (j=ai[i]; j<ai[i+1]; j++) {
1562       PetscInt cowner,col = cdest[aj[j]];
1563       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1564       if (rowner == cowner) dnnz[i]++;
1565       else onnz[i]++;
1566     }
1567     for (j=bi[i]; j<bi[i+1]; j++) {
1568       PetscInt cowner,col = gcdest[bj[j]];
1569       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1570       if (rowner == cowner) dnnz[i]++;
1571       else onnz[i]++;
1572     }
1573   }
1574   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1575   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1576   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1577   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1578   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1579 
1580   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1581   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1582   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1583   for (i=0; i<m; i++) {
1584     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1585     PetscInt j0,rowlen;
1586     rowlen = ai[i+1] - ai[i];
1587     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1588       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1589       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1590     }
1591     rowlen = bi[i+1] - bi[i];
1592     for (j0=j=0; j<rowlen; j0=j) {
1593       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1594       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1595     }
1596   }
1597   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1598   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1599   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1600   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1601   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1602   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1603   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1604   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1605   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1606   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1607   *B = Aperm;
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1612 {
1613   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1614   PetscErrorCode ierr;
1615 
1616   PetscFunctionBegin;
1617   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1618   if (ghosts) *ghosts = aij->garray;
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   Mat            A    = mat->A,B = mat->B;
1626   PetscErrorCode ierr;
1627   PetscReal      isend[5],irecv[5];
1628 
1629   PetscFunctionBegin;
1630   info->block_size = 1.0;
1631   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1632 
1633   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1634   isend[3] = info->memory;  isend[4] = info->mallocs;
1635 
1636   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1637 
1638   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1639   isend[3] += info->memory;  isend[4] += info->mallocs;
1640   if (flag == MAT_LOCAL) {
1641     info->nz_used      = isend[0];
1642     info->nz_allocated = isend[1];
1643     info->nz_unneeded  = isend[2];
1644     info->memory       = isend[3];
1645     info->mallocs      = isend[4];
1646   } else if (flag == MAT_GLOBAL_MAX) {
1647     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1648 
1649     info->nz_used      = irecv[0];
1650     info->nz_allocated = irecv[1];
1651     info->nz_unneeded  = irecv[2];
1652     info->memory       = irecv[3];
1653     info->mallocs      = irecv[4];
1654   } else if (flag == MAT_GLOBAL_SUM) {
1655     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1656 
1657     info->nz_used      = irecv[0];
1658     info->nz_allocated = irecv[1];
1659     info->nz_unneeded  = irecv[2];
1660     info->memory       = irecv[3];
1661     info->mallocs      = irecv[4];
1662   }
1663   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1664   info->fill_ratio_needed = 0;
1665   info->factor_mallocs    = 0;
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1670 {
1671   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1672   PetscErrorCode ierr;
1673 
1674   PetscFunctionBegin;
1675   switch (op) {
1676   case MAT_NEW_NONZERO_LOCATIONS:
1677   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1678   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1679   case MAT_KEEP_NONZERO_PATTERN:
1680   case MAT_NEW_NONZERO_LOCATION_ERR:
1681   case MAT_USE_INODES:
1682   case MAT_IGNORE_ZERO_ENTRIES:
1683     MatCheckPreallocated(A,1);
1684     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1685     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1686     break;
1687   case MAT_ROW_ORIENTED:
1688     MatCheckPreallocated(A,1);
1689     a->roworiented = flg;
1690 
1691     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1692     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1693     break;
1694   case MAT_NEW_DIAGONALS:
1695     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1696     break;
1697   case MAT_IGNORE_OFF_PROC_ENTRIES:
1698     a->donotstash = flg;
1699     break;
1700   case MAT_SPD:
1701     A->spd_set = PETSC_TRUE;
1702     A->spd     = flg;
1703     if (flg) {
1704       A->symmetric                  = PETSC_TRUE;
1705       A->structurally_symmetric     = PETSC_TRUE;
1706       A->symmetric_set              = PETSC_TRUE;
1707       A->structurally_symmetric_set = PETSC_TRUE;
1708     }
1709     break;
1710   case MAT_SYMMETRIC:
1711     MatCheckPreallocated(A,1);
1712     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1713     break;
1714   case MAT_STRUCTURALLY_SYMMETRIC:
1715     MatCheckPreallocated(A,1);
1716     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1717     break;
1718   case MAT_HERMITIAN:
1719     MatCheckPreallocated(A,1);
1720     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1721     break;
1722   case MAT_SYMMETRY_ETERNAL:
1723     MatCheckPreallocated(A,1);
1724     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1725     break;
1726   case MAT_SUBMAT_SINGLEIS:
1727     A->submat_singleis = flg;
1728     break;
1729   default:
1730     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1731   }
1732   PetscFunctionReturn(0);
1733 }
1734 
1735 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1736 {
1737   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1738   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1739   PetscErrorCode ierr;
1740   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1741   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1742   PetscInt       *cmap,*idx_p;
1743 
1744   PetscFunctionBegin;
1745   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1746   mat->getrowactive = PETSC_TRUE;
1747 
1748   if (!mat->rowvalues && (idx || v)) {
1749     /*
1750         allocate enough space to hold information from the longest row.
1751     */
1752     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1753     PetscInt   max = 1,tmp;
1754     for (i=0; i<matin->rmap->n; i++) {
1755       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1756       if (max < tmp) max = tmp;
1757     }
1758     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1759   }
1760 
1761   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1762   lrow = row - rstart;
1763 
1764   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1765   if (!v)   {pvA = 0; pvB = 0;}
1766   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1767   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1768   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1769   nztot = nzA + nzB;
1770 
1771   cmap = mat->garray;
1772   if (v  || idx) {
1773     if (nztot) {
1774       /* Sort by increasing column numbers, assuming A and B already sorted */
1775       PetscInt imark = -1;
1776       if (v) {
1777         *v = v_p = mat->rowvalues;
1778         for (i=0; i<nzB; i++) {
1779           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1780           else break;
1781         }
1782         imark = i;
1783         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1784         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1785       }
1786       if (idx) {
1787         *idx = idx_p = mat->rowindices;
1788         if (imark > -1) {
1789           for (i=0; i<imark; i++) {
1790             idx_p[i] = cmap[cworkB[i]];
1791           }
1792         } else {
1793           for (i=0; i<nzB; i++) {
1794             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1795             else break;
1796           }
1797           imark = i;
1798         }
1799         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1800         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1801       }
1802     } else {
1803       if (idx) *idx = 0;
1804       if (v)   *v   = 0;
1805     }
1806   }
1807   *nz  = nztot;
1808   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1809   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1814 {
1815   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1816 
1817   PetscFunctionBegin;
1818   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1819   aij->getrowactive = PETSC_FALSE;
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1824 {
1825   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1826   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1827   PetscErrorCode ierr;
1828   PetscInt       i,j,cstart = mat->cmap->rstart;
1829   PetscReal      sum = 0.0;
1830   MatScalar      *v;
1831 
1832   PetscFunctionBegin;
1833   if (aij->size == 1) {
1834     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1835   } else {
1836     if (type == NORM_FROBENIUS) {
1837       v = amat->a;
1838       for (i=0; i<amat->nz; i++) {
1839         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1840       }
1841       v = bmat->a;
1842       for (i=0; i<bmat->nz; i++) {
1843         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1844       }
1845       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1846       *norm = PetscSqrtReal(*norm);
1847       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1848     } else if (type == NORM_1) { /* max column norm */
1849       PetscReal *tmp,*tmp2;
1850       PetscInt  *jj,*garray = aij->garray;
1851       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1852       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1853       *norm = 0.0;
1854       v     = amat->a; jj = amat->j;
1855       for (j=0; j<amat->nz; j++) {
1856         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1857       }
1858       v = bmat->a; jj = bmat->j;
1859       for (j=0; j<bmat->nz; j++) {
1860         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1861       }
1862       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1863       for (j=0; j<mat->cmap->N; j++) {
1864         if (tmp2[j] > *norm) *norm = tmp2[j];
1865       }
1866       ierr = PetscFree(tmp);CHKERRQ(ierr);
1867       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1868       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1869     } else if (type == NORM_INFINITY) { /* max row norm */
1870       PetscReal ntemp = 0.0;
1871       for (j=0; j<aij->A->rmap->n; j++) {
1872         v   = amat->a + amat->i[j];
1873         sum = 0.0;
1874         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1875           sum += PetscAbsScalar(*v); v++;
1876         }
1877         v = bmat->a + bmat->i[j];
1878         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1879           sum += PetscAbsScalar(*v); v++;
1880         }
1881         if (sum > ntemp) ntemp = sum;
1882       }
1883       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1884       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1885     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1886   }
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1891 {
1892   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1893   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1894   PetscErrorCode ierr;
1895   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1896   PetscInt       cstart = A->cmap->rstart,ncol;
1897   Mat            B;
1898   MatScalar      *array;
1899 
1900   PetscFunctionBegin;
1901   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1902 
1903   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1904   ai = Aloc->i; aj = Aloc->j;
1905   bi = Bloc->i; bj = Bloc->j;
1906   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1907     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1908     PetscSFNode          *oloc;
1909     PETSC_UNUSED PetscSF sf;
1910 
1911     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1912     /* compute d_nnz for preallocation */
1913     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1914     for (i=0; i<ai[ma]; i++) {
1915       d_nnz[aj[i]]++;
1916       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1917     }
1918     /* compute local off-diagonal contributions */
1919     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1920     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1921     /* map those to global */
1922     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1923     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1924     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1925     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1926     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1927     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1928     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1929 
1930     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1931     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1932     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1933     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1934     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1935     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1936   } else {
1937     B    = *matout;
1938     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1939     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1940   }
1941 
1942   /* copy over the A part */
1943   array = Aloc->a;
1944   row   = A->rmap->rstart;
1945   for (i=0; i<ma; i++) {
1946     ncol = ai[i+1]-ai[i];
1947     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1948     row++;
1949     array += ncol; aj += ncol;
1950   }
1951   aj = Aloc->j;
1952   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1953 
1954   /* copy over the B part */
1955   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1956   array = Bloc->a;
1957   row   = A->rmap->rstart;
1958   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1959   cols_tmp = cols;
1960   for (i=0; i<mb; i++) {
1961     ncol = bi[i+1]-bi[i];
1962     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1963     row++;
1964     array += ncol; cols_tmp += ncol;
1965   }
1966   ierr = PetscFree(cols);CHKERRQ(ierr);
1967 
1968   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1969   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1970   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1971     *matout = B;
1972   } else {
1973     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1974   }
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1979 {
1980   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1981   Mat            a    = aij->A,b = aij->B;
1982   PetscErrorCode ierr;
1983   PetscInt       s1,s2,s3;
1984 
1985   PetscFunctionBegin;
1986   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1987   if (rr) {
1988     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1989     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1990     /* Overlap communication with computation. */
1991     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1992   }
1993   if (ll) {
1994     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1995     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1996     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1997   }
1998   /* scale  the diagonal block */
1999   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2000 
2001   if (rr) {
2002     /* Do a scatter end and then right scale the off-diagonal block */
2003     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2004     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2005   }
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2010 {
2011   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2012   PetscErrorCode ierr;
2013 
2014   PetscFunctionBegin;
2015   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2020 {
2021   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2022   Mat            a,b,c,d;
2023   PetscBool      flg;
2024   PetscErrorCode ierr;
2025 
2026   PetscFunctionBegin;
2027   a = matA->A; b = matA->B;
2028   c = matB->A; d = matB->B;
2029 
2030   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2031   if (flg) {
2032     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2033   }
2034   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2039 {
2040   PetscErrorCode ierr;
2041   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2042   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2043 
2044   PetscFunctionBegin;
2045   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2046   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2047     /* because of the column compression in the off-processor part of the matrix a->B,
2048        the number of columns in a->B and b->B may be different, hence we cannot call
2049        the MatCopy() directly on the two parts. If need be, we can provide a more
2050        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2051        then copying the submatrices */
2052     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2053   } else {
2054     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2055     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2056   }
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2061 {
2062   PetscErrorCode ierr;
2063 
2064   PetscFunctionBegin;
2065   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 /*
2070    Computes the number of nonzeros per row needed for preallocation when X and Y
2071    have different nonzero structure.
2072 */
2073 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2074 {
2075   PetscInt       i,j,k,nzx,nzy;
2076 
2077   PetscFunctionBegin;
2078   /* Set the number of nonzeros in the new matrix */
2079   for (i=0; i<m; i++) {
2080     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2081     nzx = xi[i+1] - xi[i];
2082     nzy = yi[i+1] - yi[i];
2083     nnz[i] = 0;
2084     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2085       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2086       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2087       nnz[i]++;
2088     }
2089     for (; k<nzy; k++) nnz[i]++;
2090   }
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2095 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2096 {
2097   PetscErrorCode ierr;
2098   PetscInt       m = Y->rmap->N;
2099   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2100   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2101 
2102   PetscFunctionBegin;
2103   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2108 {
2109   PetscErrorCode ierr;
2110   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2111   PetscBLASInt   bnz,one=1;
2112   Mat_SeqAIJ     *x,*y;
2113 
2114   PetscFunctionBegin;
2115   if (str == SAME_NONZERO_PATTERN) {
2116     PetscScalar alpha = a;
2117     x    = (Mat_SeqAIJ*)xx->A->data;
2118     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2119     y    = (Mat_SeqAIJ*)yy->A->data;
2120     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2121     x    = (Mat_SeqAIJ*)xx->B->data;
2122     y    = (Mat_SeqAIJ*)yy->B->data;
2123     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2124     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2125     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2126   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2127     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2128   } else {
2129     Mat      B;
2130     PetscInt *nnz_d,*nnz_o;
2131     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2132     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2133     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2134     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2135     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2136     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2137     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2138     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2139     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2140     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2141     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2142     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2143     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2144     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2145   }
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2150 
2151 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2152 {
2153 #if defined(PETSC_USE_COMPLEX)
2154   PetscErrorCode ierr;
2155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2156 
2157   PetscFunctionBegin;
2158   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2159   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2160 #else
2161   PetscFunctionBegin;
2162 #endif
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2167 {
2168   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2169   PetscErrorCode ierr;
2170 
2171   PetscFunctionBegin;
2172   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2173   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2178 {
2179   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2180   PetscErrorCode ierr;
2181 
2182   PetscFunctionBegin;
2183   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2184   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2189 {
2190   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2191   PetscErrorCode ierr;
2192   PetscInt       i,*idxb = 0;
2193   PetscScalar    *va,*vb;
2194   Vec            vtmp;
2195 
2196   PetscFunctionBegin;
2197   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2198   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2199   if (idx) {
2200     for (i=0; i<A->rmap->n; i++) {
2201       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2202     }
2203   }
2204 
2205   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2206   if (idx) {
2207     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2208   }
2209   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2210   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2211 
2212   for (i=0; i<A->rmap->n; i++) {
2213     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2214       va[i] = vb[i];
2215       if (idx) idx[i] = a->garray[idxb[i]];
2216     }
2217   }
2218 
2219   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2220   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2221   ierr = PetscFree(idxb);CHKERRQ(ierr);
2222   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2227 {
2228   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2229   PetscErrorCode ierr;
2230   PetscInt       i,*idxb = 0;
2231   PetscScalar    *va,*vb;
2232   Vec            vtmp;
2233 
2234   PetscFunctionBegin;
2235   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2236   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2237   if (idx) {
2238     for (i=0; i<A->cmap->n; i++) {
2239       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2240     }
2241   }
2242 
2243   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2244   if (idx) {
2245     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2246   }
2247   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2248   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2249 
2250   for (i=0; i<A->rmap->n; i++) {
2251     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2252       va[i] = vb[i];
2253       if (idx) idx[i] = a->garray[idxb[i]];
2254     }
2255   }
2256 
2257   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2258   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2259   ierr = PetscFree(idxb);CHKERRQ(ierr);
2260   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2265 {
2266   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2267   PetscInt       n      = A->rmap->n;
2268   PetscInt       cstart = A->cmap->rstart;
2269   PetscInt       *cmap  = mat->garray;
2270   PetscInt       *diagIdx, *offdiagIdx;
2271   Vec            diagV, offdiagV;
2272   PetscScalar    *a, *diagA, *offdiagA;
2273   PetscInt       r;
2274   PetscErrorCode ierr;
2275 
2276   PetscFunctionBegin;
2277   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2278   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2279   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2280   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2281   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2282   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2283   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2284   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2285   for (r = 0; r < n; ++r) {
2286     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2287       a[r]   = diagA[r];
2288       idx[r] = cstart + diagIdx[r];
2289     } else {
2290       a[r]   = offdiagA[r];
2291       idx[r] = cmap[offdiagIdx[r]];
2292     }
2293   }
2294   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2295   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2296   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2297   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2298   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2299   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2304 {
2305   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2306   PetscInt       n      = A->rmap->n;
2307   PetscInt       cstart = A->cmap->rstart;
2308   PetscInt       *cmap  = mat->garray;
2309   PetscInt       *diagIdx, *offdiagIdx;
2310   Vec            diagV, offdiagV;
2311   PetscScalar    *a, *diagA, *offdiagA;
2312   PetscInt       r;
2313   PetscErrorCode ierr;
2314 
2315   PetscFunctionBegin;
2316   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2317   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2318   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2319   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2320   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2321   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2322   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2323   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2324   for (r = 0; r < n; ++r) {
2325     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2326       a[r]   = diagA[r];
2327       idx[r] = cstart + diagIdx[r];
2328     } else {
2329       a[r]   = offdiagA[r];
2330       idx[r] = cmap[offdiagIdx[r]];
2331     }
2332   }
2333   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2334   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2335   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2336   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2337   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2338   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2339   PetscFunctionReturn(0);
2340 }
2341 
2342 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2343 {
2344   PetscErrorCode ierr;
2345   Mat            *dummy;
2346 
2347   PetscFunctionBegin;
2348   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2349   *newmat = *dummy;
2350   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2355 {
2356   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2357   PetscErrorCode ierr;
2358 
2359   PetscFunctionBegin;
2360   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2361   A->factorerrortype = a->A->factorerrortype;
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2366 {
2367   PetscErrorCode ierr;
2368   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2369 
2370   PetscFunctionBegin;
2371   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2372   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2373   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2374   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2379 {
2380   PetscFunctionBegin;
2381   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2382   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 /*@
2387    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2388 
2389    Collective on Mat
2390 
2391    Input Parameters:
2392 +    A - the matrix
2393 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2394 
2395  Level: advanced
2396 
2397 @*/
2398 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2399 {
2400   PetscErrorCode       ierr;
2401 
2402   PetscFunctionBegin;
2403   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2408 {
2409   PetscErrorCode       ierr;
2410   PetscBool            sc = PETSC_FALSE,flg;
2411 
2412   PetscFunctionBegin;
2413   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2414   ierr = PetscObjectOptionsBegin((PetscObject)A);
2415     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2416     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2417     if (flg) {
2418       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2419     }
2420   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2425 {
2426   PetscErrorCode ierr;
2427   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2428   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2429 
2430   PetscFunctionBegin;
2431   if (!Y->preallocated) {
2432     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2433   } else if (!aij->nz) {
2434     PetscInt nonew = aij->nonew;
2435     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2436     aij->nonew = nonew;
2437   }
2438   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2443 {
2444   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2445   PetscErrorCode ierr;
2446 
2447   PetscFunctionBegin;
2448   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2449   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2450   if (d) {
2451     PetscInt rstart;
2452     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2453     *d += rstart;
2454 
2455   }
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 
2460 /* -------------------------------------------------------------------*/
2461 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2462                                        MatGetRow_MPIAIJ,
2463                                        MatRestoreRow_MPIAIJ,
2464                                        MatMult_MPIAIJ,
2465                                 /* 4*/ MatMultAdd_MPIAIJ,
2466                                        MatMultTranspose_MPIAIJ,
2467                                        MatMultTransposeAdd_MPIAIJ,
2468                                        0,
2469                                        0,
2470                                        0,
2471                                 /*10*/ 0,
2472                                        0,
2473                                        0,
2474                                        MatSOR_MPIAIJ,
2475                                        MatTranspose_MPIAIJ,
2476                                 /*15*/ MatGetInfo_MPIAIJ,
2477                                        MatEqual_MPIAIJ,
2478                                        MatGetDiagonal_MPIAIJ,
2479                                        MatDiagonalScale_MPIAIJ,
2480                                        MatNorm_MPIAIJ,
2481                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2482                                        MatAssemblyEnd_MPIAIJ,
2483                                        MatSetOption_MPIAIJ,
2484                                        MatZeroEntries_MPIAIJ,
2485                                 /*24*/ MatZeroRows_MPIAIJ,
2486                                        0,
2487                                        0,
2488                                        0,
2489                                        0,
2490                                 /*29*/ MatSetUp_MPIAIJ,
2491                                        0,
2492                                        0,
2493                                        MatGetDiagonalBlock_MPIAIJ,
2494                                        0,
2495                                 /*34*/ MatDuplicate_MPIAIJ,
2496                                        0,
2497                                        0,
2498                                        0,
2499                                        0,
2500                                 /*39*/ MatAXPY_MPIAIJ,
2501                                        MatCreateSubMatrices_MPIAIJ,
2502                                        MatIncreaseOverlap_MPIAIJ,
2503                                        MatGetValues_MPIAIJ,
2504                                        MatCopy_MPIAIJ,
2505                                 /*44*/ MatGetRowMax_MPIAIJ,
2506                                        MatScale_MPIAIJ,
2507                                        MatShift_MPIAIJ,
2508                                        MatDiagonalSet_MPIAIJ,
2509                                        MatZeroRowsColumns_MPIAIJ,
2510                                 /*49*/ MatSetRandom_MPIAIJ,
2511                                        0,
2512                                        0,
2513                                        0,
2514                                        0,
2515                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2516                                        0,
2517                                        MatSetUnfactored_MPIAIJ,
2518                                        MatPermute_MPIAIJ,
2519                                        0,
2520                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2521                                        MatDestroy_MPIAIJ,
2522                                        MatView_MPIAIJ,
2523                                        0,
2524                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2525                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2526                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2527                                        0,
2528                                        0,
2529                                        0,
2530                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2531                                        MatGetRowMinAbs_MPIAIJ,
2532                                        0,
2533                                        0,
2534                                        0,
2535                                        0,
2536                                 /*75*/ MatFDColoringApply_AIJ,
2537                                        MatSetFromOptions_MPIAIJ,
2538                                        0,
2539                                        0,
2540                                        MatFindZeroDiagonals_MPIAIJ,
2541                                 /*80*/ 0,
2542                                        0,
2543                                        0,
2544                                 /*83*/ MatLoad_MPIAIJ,
2545                                        0,
2546                                        0,
2547                                        0,
2548                                        0,
2549                                        0,
2550                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2551                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2552                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2553                                        MatPtAP_MPIAIJ_MPIAIJ,
2554                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2555                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2556                                        0,
2557                                        0,
2558                                        0,
2559                                        0,
2560                                 /*99*/ 0,
2561                                        0,
2562                                        0,
2563                                        MatConjugate_MPIAIJ,
2564                                        0,
2565                                 /*104*/MatSetValuesRow_MPIAIJ,
2566                                        MatRealPart_MPIAIJ,
2567                                        MatImaginaryPart_MPIAIJ,
2568                                        0,
2569                                        0,
2570                                 /*109*/0,
2571                                        0,
2572                                        MatGetRowMin_MPIAIJ,
2573                                        0,
2574                                        MatMissingDiagonal_MPIAIJ,
2575                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2576                                        0,
2577                                        MatGetGhosts_MPIAIJ,
2578                                        0,
2579                                        0,
2580                                 /*119*/0,
2581                                        0,
2582                                        0,
2583                                        0,
2584                                        MatGetMultiProcBlock_MPIAIJ,
2585                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2586                                        MatGetColumnNorms_MPIAIJ,
2587                                        MatInvertBlockDiagonal_MPIAIJ,
2588                                        0,
2589                                        MatCreateSubMatricesMPI_MPIAIJ,
2590                                 /*129*/0,
2591                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2592                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2593                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2594                                        0,
2595                                 /*134*/0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                        0,
2600                                 /*139*/MatSetBlockSizes_MPIAIJ,
2601                                        0,
2602                                        0,
2603                                        MatFDColoringSetUp_MPIXAIJ,
2604                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2605                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2606 };
2607 
2608 /* ----------------------------------------------------------------------------------------*/
2609 
2610 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2611 {
2612   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2613   PetscErrorCode ierr;
2614 
2615   PetscFunctionBegin;
2616   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2617   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2622 {
2623   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2624   PetscErrorCode ierr;
2625 
2626   PetscFunctionBegin;
2627   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2628   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2629   PetscFunctionReturn(0);
2630 }
2631 
2632 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2633 {
2634   Mat_MPIAIJ     *b;
2635   PetscErrorCode ierr;
2636 
2637   PetscFunctionBegin;
2638   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2639   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2640   b = (Mat_MPIAIJ*)B->data;
2641 
2642 #if defined(PETSC_USE_CTABLE)
2643   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2644 #else
2645   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2646 #endif
2647   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2648   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2649   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2650 
2651   /* Because the B will have been resized we simply destroy it and create a new one each time */
2652   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2653   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2654   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2655   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2656   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2657   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2658 
2659   if (!B->preallocated) {
2660     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2661     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2662     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2663     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2664     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2665   }
2666 
2667   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2668   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2669   B->preallocated  = PETSC_TRUE;
2670   B->was_assembled = PETSC_FALSE;
2671   B->assembled     = PETSC_FALSE;;
2672   PetscFunctionReturn(0);
2673 }
2674 
2675 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2676 {
2677   Mat            mat;
2678   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2679   PetscErrorCode ierr;
2680 
2681   PetscFunctionBegin;
2682   *newmat = 0;
2683   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2684   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2685   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2686   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2687   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2688   a       = (Mat_MPIAIJ*)mat->data;
2689 
2690   mat->factortype   = matin->factortype;
2691   mat->assembled    = PETSC_TRUE;
2692   mat->insertmode   = NOT_SET_VALUES;
2693   mat->preallocated = PETSC_TRUE;
2694 
2695   a->size         = oldmat->size;
2696   a->rank         = oldmat->rank;
2697   a->donotstash   = oldmat->donotstash;
2698   a->roworiented  = oldmat->roworiented;
2699   a->rowindices   = 0;
2700   a->rowvalues    = 0;
2701   a->getrowactive = PETSC_FALSE;
2702 
2703   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2704   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2705 
2706   if (oldmat->colmap) {
2707 #if defined(PETSC_USE_CTABLE)
2708     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2709 #else
2710     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2711     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2712     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2713 #endif
2714   } else a->colmap = 0;
2715   if (oldmat->garray) {
2716     PetscInt len;
2717     len  = oldmat->B->cmap->n;
2718     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2719     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2720     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2721   } else a->garray = 0;
2722 
2723   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2724   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2725   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2726   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2727   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2728   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2729   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2730   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2731   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2732   *newmat = mat;
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2737 {
2738   PetscScalar    *vals,*svals;
2739   MPI_Comm       comm;
2740   PetscErrorCode ierr;
2741   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2742   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2743   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2744   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2745   PetscInt       cend,cstart,n,*rowners;
2746   int            fd;
2747   PetscInt       bs = newMat->rmap->bs;
2748 
2749   PetscFunctionBegin;
2750   /* force binary viewer to load .info file if it has not yet done so */
2751   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2752   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2753   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2754   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2755   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2756   if (!rank) {
2757     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2758     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2759     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2760   }
2761 
2762   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2763   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2764   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2765   if (bs < 0) bs = 1;
2766 
2767   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2768   M    = header[1]; N = header[2];
2769 
2770   /* If global sizes are set, check if they are consistent with that given in the file */
2771   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2772   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2773 
2774   /* determine ownership of all (block) rows */
2775   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2776   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2777   else m = newMat->rmap->n; /* Set by user */
2778 
2779   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2780   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2781 
2782   /* First process needs enough room for process with most rows */
2783   if (!rank) {
2784     mmax = rowners[1];
2785     for (i=2; i<=size; i++) {
2786       mmax = PetscMax(mmax, rowners[i]);
2787     }
2788   } else mmax = -1;             /* unused, but compilers complain */
2789 
2790   rowners[0] = 0;
2791   for (i=2; i<=size; i++) {
2792     rowners[i] += rowners[i-1];
2793   }
2794   rstart = rowners[rank];
2795   rend   = rowners[rank+1];
2796 
2797   /* distribute row lengths to all processors */
2798   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2799   if (!rank) {
2800     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2801     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2802     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2803     for (j=0; j<m; j++) {
2804       procsnz[0] += ourlens[j];
2805     }
2806     for (i=1; i<size; i++) {
2807       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2808       /* calculate the number of nonzeros on each processor */
2809       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2810         procsnz[i] += rowlengths[j];
2811       }
2812       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2813     }
2814     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2815   } else {
2816     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2817   }
2818 
2819   if (!rank) {
2820     /* determine max buffer needed and allocate it */
2821     maxnz = 0;
2822     for (i=0; i<size; i++) {
2823       maxnz = PetscMax(maxnz,procsnz[i]);
2824     }
2825     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2826 
2827     /* read in my part of the matrix column indices  */
2828     nz   = procsnz[0];
2829     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2830     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2831 
2832     /* read in every one elses and ship off */
2833     for (i=1; i<size; i++) {
2834       nz   = procsnz[i];
2835       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2836       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2837     }
2838     ierr = PetscFree(cols);CHKERRQ(ierr);
2839   } else {
2840     /* determine buffer space needed for message */
2841     nz = 0;
2842     for (i=0; i<m; i++) {
2843       nz += ourlens[i];
2844     }
2845     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2846 
2847     /* receive message of column indices*/
2848     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2849   }
2850 
2851   /* determine column ownership if matrix is not square */
2852   if (N != M) {
2853     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2854     else n = newMat->cmap->n;
2855     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2856     cstart = cend - n;
2857   } else {
2858     cstart = rstart;
2859     cend   = rend;
2860     n      = cend - cstart;
2861   }
2862 
2863   /* loop over local rows, determining number of off diagonal entries */
2864   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2865   jj   = 0;
2866   for (i=0; i<m; i++) {
2867     for (j=0; j<ourlens[i]; j++) {
2868       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2869       jj++;
2870     }
2871   }
2872 
2873   for (i=0; i<m; i++) {
2874     ourlens[i] -= offlens[i];
2875   }
2876   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2877 
2878   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2879 
2880   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2881 
2882   for (i=0; i<m; i++) {
2883     ourlens[i] += offlens[i];
2884   }
2885 
2886   if (!rank) {
2887     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2888 
2889     /* read in my part of the matrix numerical values  */
2890     nz   = procsnz[0];
2891     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2892 
2893     /* insert into matrix */
2894     jj      = rstart;
2895     smycols = mycols;
2896     svals   = vals;
2897     for (i=0; i<m; i++) {
2898       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2899       smycols += ourlens[i];
2900       svals   += ourlens[i];
2901       jj++;
2902     }
2903 
2904     /* read in other processors and ship out */
2905     for (i=1; i<size; i++) {
2906       nz   = procsnz[i];
2907       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2908       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2909     }
2910     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2911   } else {
2912     /* receive numeric values */
2913     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2914 
2915     /* receive message of values*/
2916     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2917 
2918     /* insert into matrix */
2919     jj      = rstart;
2920     smycols = mycols;
2921     svals   = vals;
2922     for (i=0; i<m; i++) {
2923       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2924       smycols += ourlens[i];
2925       svals   += ourlens[i];
2926       jj++;
2927     }
2928   }
2929   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2930   ierr = PetscFree(vals);CHKERRQ(ierr);
2931   ierr = PetscFree(mycols);CHKERRQ(ierr);
2932   ierr = PetscFree(rowners);CHKERRQ(ierr);
2933   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2934   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2935   PetscFunctionReturn(0);
2936 }
2937 
2938 /* Not scalable because of ISAllGather() unless getting all columns. */
2939 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2940 {
2941   PetscErrorCode ierr;
2942   IS             iscol_local;
2943   PetscBool      isstride;
2944   PetscMPIInt    lisstride=0,gisstride;
2945 
2946   PetscFunctionBegin;
2947   /* check if we are grabbing all columns*/
2948   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2949 
2950   if (isstride) {
2951     PetscInt  start,len,mstart,mlen;
2952     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2953     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2954     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2955     if (mstart == start && mlen-mstart == len) lisstride = 1;
2956   }
2957 
2958   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2959   if (gisstride) {
2960     PetscInt N;
2961     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
2962     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
2963     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
2964     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
2965   } else {
2966     PetscInt cbs;
2967     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
2968     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2969     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
2970   }
2971 
2972   *isseq = iscol_local;
2973   PetscFunctionReturn(0);
2974 }
2975 
2976 /*
2977  Used for avoiding ISAllGather() and global size of iscol_local (see MatCreateSubMatrix_MPIAIJ_nonscalable)
2978 
2979  Input Parameters:
2980    mat - matrix
2981    iscol - parallel column index set; its local indices are a subset of local columns of mat,
2982            i.e., mat->cstart <= iscol[i] < mat->cend
2983  Output Parameter:
2984    iscol_sub - sequential column index set, a subset of iscol
2985    iscmap - column map; iscmap[i] indicates global location of iscol_sub[i] in iscol
2986  */
2987 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,IS *isgarray)
2988 {
2989   PetscErrorCode ierr;
2990   Vec            x,cmap;
2991   const PetscInt *is_idx;
2992   PetscScalar    *xarray,*cmaparray;
2993   PetscInt       ncols,isstart,*idx,*camp,count,m,rstart;
2994   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
2995   Mat            B=a->B;
2996   Vec            lvec=a->lvec,lcmap;
2997   PetscInt       i,cstart,cend,Bn=B->cmap->N;
2998   MPI_Comm       comm;
2999 
3000   PetscFunctionBegin;
3001   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3002   PetscMPIInt rank;
3003   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3004   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3005 
3006   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3007   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3008   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3009   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3010 
3011   /* get start indices */
3012   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3013   isstart -= ncols;
3014   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3015 
3016   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3017   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3018   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3019   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3020   for (i=0; i<ncols; i++) {
3021     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3022     cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */
3023 
3024     idx[i]                      = is_idx[i]-cstart;
3025   }
3026   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3027   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3028   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3029 
3030   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_COPY_VALUES,iscol_d);CHKERRQ(ierr);
3031   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3032   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3033   ierr = PetscFree(idx);CHKERRQ(ierr);
3034 
3035   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3036   rstart = mat->rmap->rstart;
3037   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3038   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3039   for (i=0; i<m; i++) {
3040     idx[i] = is_idx[i]-rstart;
3041   }
3042   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3043 
3044   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_COPY_VALUES,isrow_d);CHKERRQ(ierr);
3045   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3046   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3047   ierr = PetscFree(idx);CHKERRQ(ierr);
3048 
3049   /* (2) scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3050   ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3051 
3052   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3053 
3054   ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3055   ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3056 
3057   count = ncols + Bn;
3058   ierr  = PetscMalloc2(count,&idx,count,&camp);CHKERRQ(ierr);
3059 
3060   ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3061 
3062   /* (3) create scalable iscol_sub (a subset of iscol) and iscmap */
3063   /* local column indices */
3064   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3065   for (i=0; i<ncols; i++) {
3066     idx[i]  = is_idx[i];
3067     camp[i] = i + isstart; /* global index of iscol = column index in submat */
3068   }
3069   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3070   count = ncols;
3071 
3072   /* off-process column indices */
3073   PetscInt *idx1,*cmap1,count1=0;
3074   ierr = PetscMalloc2(Bn,&idx1,Bn,&cmap1);CHKERRQ(ierr);
3075 
3076   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3077   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3078   for (i=0; i<Bn; i++) {
3079     if (PetscRealPart(xarray[i]) > -1.0) {
3080       idx1[count1]    = i; /* local column index in off-diagonal part B */
3081       cmap1[count1++] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3082 
3083       idx[count]    = (PetscInt)PetscRealPart(xarray[i]);
3084       camp[count++] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3085     }
3086   }
3087   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3088   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3089 
3090   ierr = PetscSortIntWithArray(count,camp,idx);CHKERRQ(ierr);
3091 
3092   ierr = ISCreateGeneral(PETSC_COMM_SELF,count1,idx1,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3093   ierr = ISCreateGeneral(PETSC_COMM_SELF,count1,cmap1,PETSC_COPY_VALUES,isgarray);CHKERRQ(ierr);
3094   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3095   ierr = ISSetBlockSize(*iscol_o,i);CHKERRQ(ierr);
3096   ierr = PetscFree2(idx1,cmap1);CHKERRQ(ierr);
3097 
3098   ierr = PetscFree2(idx,camp);CHKERRQ(ierr);
3099   ierr = VecDestroy(&x);CHKERRQ(ierr);
3100   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3101   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3102   PetscFunctionReturn(0);
3103 }
3104 
3105 /* Both isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3106 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3107 {
3108   PetscErrorCode ierr;
3109   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3110   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3111   Mat            M=NULL,B=a->B;
3112   PetscInt       *garray = a->garray,*colsub,Ncols;
3113   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3114   IS             iscol_sub,iscmap;
3115   const PetscInt *is_idx,*cmap;
3116   PetscBool      allcolumns=PETSC_FALSE;
3117   IS             iscol_local=NULL;
3118   MPI_Comm       comm;
3119   IS             iscol_d,isrow_d,iscol_o,isgarray;
3120   Mat            Asub=NULL,Bsub=NULL;
3121 
3122   PetscFunctionBegin;
3123   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3124   PetscMPIInt rank;
3125   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3126 
3127   if (call == MAT_REUSE_MATRIX) {
3128     Mat_MPIAIJ *matsub=(Mat_MPIAIJ*)(*submat)->data;
3129 
3130     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3131     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3132 
3133     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3134     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3135 
3136     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3137     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3138 
3139     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,call,&matsub->A);CHKERRQ(ierr);
3140     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,call,&matsub->B);CHKERRQ(ierr);
3141     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3142     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3143 
3144   } else { /* call == MAT_INITIAL_MATRIX) */
3145     const PetscInt *garray1;
3146 
3147     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&isgarray);CHKERRQ(ierr);
3148 
3149     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,call,&Asub);CHKERRQ(ierr);
3150     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,call,&Bsub);CHKERRQ(ierr);
3151 
3152     PetscInt BsubN = Bsub->cmap->N;
3153 #if 0
3154     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3155     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3156     ierr = MatSetBlockSizes(Asub,bs,cbs);CHKERRQ(ierr);
3157     printf("[%d] bs %d cbs %d Asub rbs %d, cbs %d\n",rank,bs,cbs,Asub->rmap->bs,Asub->cmap->bs);
3158 #endif
3159 
3160     if (rank == -1) {
3161       printf("[%d] Asub:\n",rank);
3162       ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF, PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
3163       ierr = MatView(Asub,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
3164       ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
3165       if (!rank) printf("--------------------\n");
3166 
3167       Mat_SeqAIJ *b = (Mat_SeqAIJ*)Bsub->data;
3168       printf("[%d] Bsub: allocated nz %d\n",rank,b->maxnz);
3169       ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF, PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
3170       ierr = MatView(Bsub,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
3171       ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
3172       if (!rank) printf("--------------------\n");
3173     }
3174 
3175     ierr = ISGetIndices(isgarray,&garray1);CHKERRQ(ierr);
3176     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray1,&M);CHKERRQ(ierr);
3177     ierr = ISRestoreIndices(isgarray,&garray1);CHKERRQ(ierr);
3178 
3179     if (rank == -1) {
3180       printf("matrix M:\n");
3181     ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD, PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
3182     ierr = MatView(M,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
3183     ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
3184     if (!rank) printf("--------------------\n");
3185     ierr = MPI_Barrier(comm);CHKERRQ(ierr);
3186     }
3187 
3188     /* Check is Bsub == M->B? If not, compress iscol_o accordingly */
3189 
3190     a = (Mat_MPIAIJ*)M->data;
3191 #if 1
3192     if (BsubN != a->B->cmap->N) {
3193       printf("[%d] Bsub->cmap->N %d != a->B->cmap->N %d\n",rank,BsubN,a->B->cmap->N);
3194     }
3195 #endif
3196 
3197     ierr = ISDestroy(&isgarray);CHKERRQ(ierr);
3198 
3199     /* save isrow_d, iscol_d and iscol_o used in processor for next request */
3200     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3201     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3202 
3203     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3204     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3205 
3206     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3207     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3208 
3209     *submat = M;
3210   }
3211   PetscFunctionReturn(0);
3212 }
3213 
3214 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3215 {
3216   PetscErrorCode ierr;
3217   IS             iscol_local,isrow_d;
3218   PetscInt       csize;
3219   PetscInt       n,i,j,start,end;
3220   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3221   MPI_Comm       comm;
3222 
3223   PetscFunctionBegin;
3224   /* If isrow has same processor distribution as mat,
3225      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3226   if (call == MAT_REUSE_MATRIX) {
3227     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3228     if (isrow_d) {
3229       sameRowDist  = PETSC_TRUE;
3230       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3231     } else {
3232       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3233       if (iscol_local) {
3234         sameRowDist  = PETSC_TRUE;
3235         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3236       }
3237     }
3238   } else {
3239     /* check if isrow has same processor distribution as mat */
3240     sameDist[0] = PETSC_FALSE;
3241     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3242     if (!n) {
3243       sameDist[0] = PETSC_TRUE;
3244     } else {
3245       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3246       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3247       if (i >= start && j < end) {
3248         sameDist[0] = PETSC_TRUE;
3249       }
3250     }
3251 
3252     /* check if iscol has same processor distribution as mat */
3253     sameDist[1] = PETSC_FALSE;
3254     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3255     if (!n) {
3256       sameDist[1] = PETSC_TRUE;
3257     } else {
3258       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3259       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3260       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3261     }
3262 
3263     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3264     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3265     sameRowDist = tsameDist[0];
3266   }
3267 
3268   if (sameRowDist) {
3269     if (tsameDist[1]) {
3270       /* isrow and iscol have same processor distribution as mat */
3271       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3272     } else {
3273       /* isrow has same processor distribution as mat */
3274       ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3275     }
3276     PetscFunctionReturn(0);
3277   }
3278 
3279   /* General case: iscol -> iscol_local which has global size of iscol */
3280   if (call == MAT_REUSE_MATRIX) {
3281     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3282     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3283   } else {
3284     ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3285   }
3286 
3287   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3288   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3289 
3290   if (call == MAT_INITIAL_MATRIX) {
3291     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3292     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3293   }
3294   PetscFunctionReturn(0);
3295 }
3296 
3297 /*@C
3298      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3299          and "off-diagonal" part of the matrix in CSR format.
3300 
3301    Collective on MPI_Comm
3302 
3303    Input Parameters:
3304 +  comm - MPI communicator
3305 .  A - "diagonal" portion of matrix
3306 .  B - "off-diagonal" portion of matrix, destroyed by this routine
3307 -  garray - global index of B columns
3308 
3309    Output Parameter:
3310 .   mat - the matrix, with input A as its local diagonal matrix
3311    Level: advanced
3312 
3313    Notes:
3314        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3315        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3316 
3317 .seealso: MatCreateMPIAIJWithSplitArrays()
3318 @*/
3319 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3320 {
3321   PetscErrorCode ierr;
3322   Mat_MPIAIJ     *maij;
3323   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data;
3324   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3325   PetscScalar    *oa=b->a;
3326   Mat            Btmp;
3327   PetscInt       m,n,N;
3328 
3329   PetscFunctionBegin;
3330   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3331   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3332   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %d != Bm %d",m,B->rmap->N);
3333 
3334   /* get global columns of mat */
3335   ierr = MPIU_Allreduce(&n,&N,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3336 
3337   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3338   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3339   maij = (Mat_MPIAIJ*)(*mat)->data;
3340 
3341   (*mat)->preallocated = PETSC_TRUE;
3342 
3343   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3344   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3345 
3346   maij->A = A;
3347 
3348   nz = oi[m];
3349   for (i=0; i<nz; i++) {
3350     col   = oj[i];
3351     oj[i] = garray[col];
3352   }
3353 
3354   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Btmp);CHKERRQ(ierr);
3355   Mat_SeqAIJ *btmp = (Mat_SeqAIJ*)Btmp->data;
3356   btmp->maxnz     = b->maxnz; /* allocated nonzeros of B */
3357   maij->B         = Btmp;
3358 
3359   PetscMPIInt rank;
3360   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3361   if (rank == -1) {
3362       printf("[%d] Btmp:\n",rank);
3363       ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF, PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
3364       ierr = MatView(Btmp,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
3365       ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
3366       if (!rank) printf("--------------------\n");
3367   }
3368 
3369   // Check B == Btmp??? see src/ksp/ksp/examples/tests/runex21_2,3
3370 
3371   if (B->rmap->N != Btmp->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BtmpN %d",B->rmap->N,Btmp->rmap->N);
3372 
3373   b->singlemalloc = PETSC_FALSE; /* these arrays are shared by Btmp */
3374   b->free_a       = PETSC_FALSE;
3375   b->free_ij      = PETSC_FALSE;
3376   ierr = MatDestroy(&B);CHKERRQ(ierr);
3377 
3378   b = (Mat_SeqAIJ*)Btmp->data;
3379   b->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Btmp) */
3380   b->free_a       = PETSC_TRUE;
3381   b->free_ij      = PETSC_TRUE;
3382 
3383   /* condense columns of maij->B */
3384   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3385   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3386   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3387   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3388   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3389   PetscFunctionReturn(0);
3390 }
3391 
3392 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3393 
3394 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3395 {
3396   PetscErrorCode ierr;
3397   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3398   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3399   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3400   Mat            M,Msub,B=a->B,Mnew=NULL;
3401   MatScalar      *aa;
3402   Mat_SeqAIJ     *aij;
3403   PetscInt       *garray = a->garray,*colsub,Ncols;
3404   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3405   IS             iscol_sub,iscmap;
3406   const PetscInt *is_idx,*cmap;
3407   PetscBool      allcolumns=PETSC_FALSE;
3408   IS             iscol_local=NULL;
3409   MPI_Comm       comm;
3410   IS             iscol_d,isrow_d,iscol_o,isgarray;
3411   Mat            Asub=NULL,Bsub=NULL;
3412 
3413   PetscFunctionBegin;
3414   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3415   PetscMPIInt rank;
3416   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3417 
3418   if (call == MAT_REUSE_MATRIX) {
3419     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3420     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3421     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3422 
3423     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3424     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3425 
3426     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3427     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3428 
3429     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3430 
3431   } else { /* call == MAT_INITIAL_MATRIX) */
3432     PetscBool flg;
3433 
3434     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3435     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3436 
3437     /* (1) iscol -> nonscalable iscol_local */
3438     ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3439     ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3440     if (n != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != Ncols %d",n,Ncols);
3441 
3442     /* Check for special case: each processor gets entire matrix columns */
3443     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3444     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3445     if (allcolumns) {
3446       iscol_sub = iscol_local;
3447       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3448       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3449 
3450     } else {
3451       /* (2) iscol_local -> iscol_sub and iscmap */
3452       PetscInt *idx,*cmap1,k,cbs;
3453 
3454       /* implementation below requires iscol_local be sorted, it can have duplicate indices */
3455       ierr = ISSorted(iscol_local,&flg);CHKERRQ(ierr);
3456       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unsorted iscol_local is not implemented yet");
3457 
3458       ierr = PetscMalloc2(Ncols,&idx,Ncols,&cmap1);CHKERRQ(ierr);
3459       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3460       count = 0;
3461       k     = 0;
3462       for (i=0; i<Ncols; i++) {
3463         j = is_idx[i];
3464         if (j >= cstart && j < cend) {
3465           /* diagonal part of mat */
3466           idx[count]     = j;
3467           cmap1[count++] = i; /* column index in submat */
3468         } else if (Bn) {
3469           /* off-diagonal part of mat */
3470           if (j == garray[k]) {
3471             idx[count]     = j;
3472             cmap1[count++] = i;  /* column index in submat */
3473           } else if (j > garray[k]) {
3474             while (j > garray[k] && k < Bn-1) k++;
3475             if (j == garray[k]) {
3476               idx[count]     = j;
3477               cmap1[count++] = i; /* column index in submat */
3478             }
3479           }
3480         }
3481       }
3482       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3483 
3484       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,&iscol_sub);CHKERRQ(ierr);
3485       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3486       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3487 
3488       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_COPY_VALUES,&iscmap);CHKERRQ(ierr);
3489       ierr = PetscFree2(idx,cmap1);CHKERRQ(ierr);
3490     }
3491 
3492     /* (3) Create sequential Msub */
3493     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3494   }
3495 
3496   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3497   aij  = (Mat_SeqAIJ*)(Msub)->data;
3498   ii   = aij->i;
3499   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3500 
3501   /*
3502       m - number of local rows
3503       Ncols - number of columns (same on all processors)
3504       rstart - first row in new global matrix generated
3505   */
3506   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3507 
3508   if (call == MAT_INITIAL_MATRIX) {
3509     /* (4) Create parallel newmat */
3510     PetscMPIInt    rank,size;
3511     PetscInt       csize;
3512 
3513     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3514     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3515 
3516     /*
3517         Determine the number of non-zeros in the diagonal and off-diagonal
3518         portions of the matrix in order to do correct preallocation
3519     */
3520 
3521     /* first get start and end of "diagonal" columns */
3522     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3523     if (csize == PETSC_DECIDE) {
3524       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3525       if (mglobal == Ncols) { /* square matrix */
3526         nlocal = m;
3527       } else {
3528         nlocal = Ncols/size + ((Ncols % size) > rank);
3529       }
3530     } else {
3531       nlocal = csize;
3532     }
3533     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3534     rstart = rend - nlocal;
3535     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3536 
3537     /* next, compute all the lengths */
3538     jj    = aij->j;
3539     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3540     olens = dlens + m;
3541     for (i=0; i<m; i++) {
3542       jend = ii[i+1] - ii[i];
3543       olen = 0;
3544       dlen = 0;
3545       for (j=0; j<jend; j++) {
3546         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3547         else dlen++;
3548         jj++;
3549       }
3550       olens[i] = olen;
3551       dlens[i] = dlen;
3552     }
3553     ierr = MatGetBlockSizes(Msub,&bs,&cbs);CHKERRQ(ierr);
3554 
3555     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3556     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3557     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3558     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3559     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3560     ierr = PetscFree(dlens);CHKERRQ(ierr);
3561 
3562   } else { /* call == MAT_REUSE_MATRIX */
3563     M    = *newmat;
3564     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3565     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3566     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3567     /*
3568          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3569        rather than the slower MatSetValues().
3570     */
3571     M->was_assembled = PETSC_TRUE;
3572     M->assembled     = PETSC_FALSE;
3573   }
3574 
3575   /* (5) Set values of Msub to *newmat */
3576   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3577   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3578 
3579   jj   = aij->j;
3580   aa   = aij->a;
3581   for (i=0; i<m; i++) {
3582     row = rstart + i;
3583     nz  = ii[i+1] - ii[i];
3584     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3585     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3586     jj += nz; aa += nz;
3587   }
3588   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3589 
3590   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3591   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3592 
3593   ierr = PetscFree(colsub);CHKERRQ(ierr);
3594 
3595   /* save Msub, iscol_sub and iscmap used in processor for next request */
3596   if (call ==  MAT_INITIAL_MATRIX) {
3597     *newmat = M;
3598     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3599     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3600 
3601     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3602     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3603 
3604     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3605     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3606 
3607     if (iscol_local) {
3608       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3609       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3610     }
3611   }
3612   PetscFunctionReturn(0);
3613 }
3614 
3615 /*
3616     Not great since it makes two copies of the submatrix, first an SeqAIJ
3617   in local and then by concatenating the local matrices the end result.
3618   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3619 
3620   Note: This requires a sequential iscol with all indices.
3621 */
3622 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3623 {
3624   PetscErrorCode ierr;
3625   PetscMPIInt    rank,size;
3626   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3627   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3628   Mat            M,Mreuse;
3629   MatScalar      *aa,*vwork;
3630   MPI_Comm       comm;
3631   Mat_SeqAIJ     *aij;
3632   PetscBool      colflag,allcolumns=PETSC_FALSE;
3633 
3634   PetscFunctionBegin;
3635   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3636   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3637   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3638 
3639   /* Check for special case: each processor gets entire matrix columns */
3640   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3641   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3642   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3643 
3644   if (call ==  MAT_REUSE_MATRIX) {
3645     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3646     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3647     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3648   } else {
3649     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3650   }
3651 
3652   /*
3653       m - number of local rows
3654       n - number of columns (same on all processors)
3655       rstart - first row in new global matrix generated
3656   */
3657   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3658   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3659   if (call == MAT_INITIAL_MATRIX) {
3660     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3661     ii  = aij->i;
3662     jj  = aij->j;
3663 
3664     /*
3665         Determine the number of non-zeros in the diagonal and off-diagonal
3666         portions of the matrix in order to do correct preallocation
3667     */
3668 
3669     /* first get start and end of "diagonal" columns */
3670     if (csize == PETSC_DECIDE) {
3671       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3672       if (mglobal == n) { /* square matrix */
3673         nlocal = m;
3674       } else {
3675         nlocal = n/size + ((n % size) > rank);
3676       }
3677     } else {
3678       nlocal = csize;
3679     }
3680     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3681     rstart = rend - nlocal;
3682     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3683 
3684     /* next, compute all the lengths */
3685     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3686     olens = dlens + m;
3687     for (i=0; i<m; i++) {
3688       jend = ii[i+1] - ii[i];
3689       olen = 0;
3690       dlen = 0;
3691       for (j=0; j<jend; j++) {
3692         if (*jj < rstart || *jj >= rend) olen++;
3693         else dlen++;
3694         jj++;
3695       }
3696       olens[i] = olen;
3697       dlens[i] = dlen;
3698     }
3699     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3700     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3701     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3702     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3703     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3704     ierr = PetscFree(dlens);CHKERRQ(ierr);
3705   } else {
3706     PetscInt ml,nl;
3707 
3708     M    = *newmat;
3709     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3710     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3711     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3712     /*
3713          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3714        rather than the slower MatSetValues().
3715     */
3716     M->was_assembled = PETSC_TRUE;
3717     M->assembled     = PETSC_FALSE;
3718   }
3719   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3720   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3721   ii   = aij->i;
3722   jj   = aij->j;
3723   aa   = aij->a;
3724   for (i=0; i<m; i++) {
3725     row   = rstart + i;
3726     nz    = ii[i+1] - ii[i];
3727     cwork = jj;     jj += nz;
3728     vwork = aa;     aa += nz;
3729     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3730   }
3731 
3732   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3733   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3734   *newmat = M;
3735 
3736   /* save submatrix used in processor for next request */
3737   if (call ==  MAT_INITIAL_MATRIX) {
3738     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3739     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3740   }
3741   PetscFunctionReturn(0);
3742 }
3743 
3744 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3745 {
3746   PetscInt       m,cstart, cend,j,nnz,i,d;
3747   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3748   const PetscInt *JJ;
3749   PetscScalar    *values;
3750   PetscErrorCode ierr;
3751   PetscBool      nooffprocentries;
3752 
3753   PetscFunctionBegin;
3754   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3755 
3756   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3757   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3758   m      = B->rmap->n;
3759   cstart = B->cmap->rstart;
3760   cend   = B->cmap->rend;
3761   rstart = B->rmap->rstart;
3762 
3763   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3764 
3765 #if defined(PETSC_USE_DEBUGGING)
3766   for (i=0; i<m; i++) {
3767     nnz = Ii[i+1]- Ii[i];
3768     JJ  = J + Ii[i];
3769     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3770     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3771     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3772   }
3773 #endif
3774 
3775   for (i=0; i<m; i++) {
3776     nnz     = Ii[i+1]- Ii[i];
3777     JJ      = J + Ii[i];
3778     nnz_max = PetscMax(nnz_max,nnz);
3779     d       = 0;
3780     for (j=0; j<nnz; j++) {
3781       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3782     }
3783     d_nnz[i] = d;
3784     o_nnz[i] = nnz - d;
3785   }
3786   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3787   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3788 
3789   if (v) values = (PetscScalar*)v;
3790   else {
3791     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3792   }
3793 
3794   for (i=0; i<m; i++) {
3795     ii   = i + rstart;
3796     nnz  = Ii[i+1]- Ii[i];
3797     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3798   }
3799   nooffprocentries    = B->nooffprocentries;
3800   B->nooffprocentries = PETSC_TRUE;
3801   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3802   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3803   B->nooffprocentries = nooffprocentries;
3804 
3805   if (!v) {
3806     ierr = PetscFree(values);CHKERRQ(ierr);
3807   }
3808   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3809   PetscFunctionReturn(0);
3810 }
3811 
3812 /*@
3813    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3814    (the default parallel PETSc format).
3815 
3816    Collective on MPI_Comm
3817 
3818    Input Parameters:
3819 +  B - the matrix
3820 .  i - the indices into j for the start of each local row (starts with zero)
3821 .  j - the column indices for each local row (starts with zero)
3822 -  v - optional values in the matrix
3823 
3824    Level: developer
3825 
3826    Notes:
3827        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3828      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3829      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3830 
3831        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3832 
3833        The format which is used for the sparse matrix input, is equivalent to a
3834     row-major ordering.. i.e for the following matrix, the input data expected is
3835     as shown
3836 
3837 $        1 0 0
3838 $        2 0 3     P0
3839 $       -------
3840 $        4 5 6     P1
3841 $
3842 $     Process0 [P0]: rows_owned=[0,1]
3843 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3844 $        j =  {0,0,2}  [size = 3]
3845 $        v =  {1,2,3}  [size = 3]
3846 $
3847 $     Process1 [P1]: rows_owned=[2]
3848 $        i =  {0,3}    [size = nrow+1  = 1+1]
3849 $        j =  {0,1,2}  [size = 3]
3850 $        v =  {4,5,6}  [size = 3]
3851 
3852 .keywords: matrix, aij, compressed row, sparse, parallel
3853 
3854 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3855           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3856 @*/
3857 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3858 {
3859   PetscErrorCode ierr;
3860 
3861   PetscFunctionBegin;
3862   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3863   PetscFunctionReturn(0);
3864 }
3865 
3866 /*@C
3867    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3868    (the default parallel PETSc format).  For good matrix assembly performance
3869    the user should preallocate the matrix storage by setting the parameters
3870    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3871    performance can be increased by more than a factor of 50.
3872 
3873    Collective on MPI_Comm
3874 
3875    Input Parameters:
3876 +  B - the matrix
3877 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3878            (same value is used for all local rows)
3879 .  d_nnz - array containing the number of nonzeros in the various rows of the
3880            DIAGONAL portion of the local submatrix (possibly different for each row)
3881            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3882            The size of this array is equal to the number of local rows, i.e 'm'.
3883            For matrices that will be factored, you must leave room for (and set)
3884            the diagonal entry even if it is zero.
3885 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3886            submatrix (same value is used for all local rows).
3887 -  o_nnz - array containing the number of nonzeros in the various rows of the
3888            OFF-DIAGONAL portion of the local submatrix (possibly different for
3889            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3890            structure. The size of this array is equal to the number
3891            of local rows, i.e 'm'.
3892 
3893    If the *_nnz parameter is given then the *_nz parameter is ignored
3894 
3895    The AIJ format (also called the Yale sparse matrix format or
3896    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3897    storage.  The stored row and column indices begin with zero.
3898    See Users-Manual: ch_mat for details.
3899 
3900    The parallel matrix is partitioned such that the first m0 rows belong to
3901    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3902    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3903 
3904    The DIAGONAL portion of the local submatrix of a processor can be defined
3905    as the submatrix which is obtained by extraction the part corresponding to
3906    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3907    first row that belongs to the processor, r2 is the last row belonging to
3908    the this processor, and c1-c2 is range of indices of the local part of a
3909    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3910    common case of a square matrix, the row and column ranges are the same and
3911    the DIAGONAL part is also square. The remaining portion of the local
3912    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3913 
3914    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3915 
3916    You can call MatGetInfo() to get information on how effective the preallocation was;
3917    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3918    You can also run with the option -info and look for messages with the string
3919    malloc in them to see if additional memory allocation was needed.
3920 
3921    Example usage:
3922 
3923    Consider the following 8x8 matrix with 34 non-zero values, that is
3924    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3925    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3926    as follows:
3927 
3928 .vb
3929             1  2  0  |  0  3  0  |  0  4
3930     Proc0   0  5  6  |  7  0  0  |  8  0
3931             9  0 10  | 11  0  0  | 12  0
3932     -------------------------------------
3933            13  0 14  | 15 16 17  |  0  0
3934     Proc1   0 18  0  | 19 20 21  |  0  0
3935             0  0  0  | 22 23  0  | 24  0
3936     -------------------------------------
3937     Proc2  25 26 27  |  0  0 28  | 29  0
3938            30  0  0  | 31 32 33  |  0 34
3939 .ve
3940 
3941    This can be represented as a collection of submatrices as:
3942 
3943 .vb
3944       A B C
3945       D E F
3946       G H I
3947 .ve
3948 
3949    Where the submatrices A,B,C are owned by proc0, D,E,F are
3950    owned by proc1, G,H,I are owned by proc2.
3951 
3952    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3953    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3954    The 'M','N' parameters are 8,8, and have the same values on all procs.
3955 
3956    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3957    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3958    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3959    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3960    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3961    matrix, ans [DF] as another SeqAIJ matrix.
3962 
3963    When d_nz, o_nz parameters are specified, d_nz storage elements are
3964    allocated for every row of the local diagonal submatrix, and o_nz
3965    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3966    One way to choose d_nz and o_nz is to use the max nonzerors per local
3967    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3968    In this case, the values of d_nz,o_nz are:
3969 .vb
3970      proc0 : dnz = 2, o_nz = 2
3971      proc1 : dnz = 3, o_nz = 2
3972      proc2 : dnz = 1, o_nz = 4
3973 .ve
3974    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3975    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3976    for proc3. i.e we are using 12+15+10=37 storage locations to store
3977    34 values.
3978 
3979    When d_nnz, o_nnz parameters are specified, the storage is specified
3980    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3981    In the above case the values for d_nnz,o_nnz are:
3982 .vb
3983      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3984      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3985      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3986 .ve
3987    Here the space allocated is sum of all the above values i.e 34, and
3988    hence pre-allocation is perfect.
3989 
3990    Level: intermediate
3991 
3992 .keywords: matrix, aij, compressed row, sparse, parallel
3993 
3994 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3995           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3996 @*/
3997 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3998 {
3999   PetscErrorCode ierr;
4000 
4001   PetscFunctionBegin;
4002   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4003   PetscValidType(B,1);
4004   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4005   PetscFunctionReturn(0);
4006 }
4007 
4008 /*@
4009      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4010          CSR format the local rows.
4011 
4012    Collective on MPI_Comm
4013 
4014    Input Parameters:
4015 +  comm - MPI communicator
4016 .  m - number of local rows (Cannot be PETSC_DECIDE)
4017 .  n - This value should be the same as the local size used in creating the
4018        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4019        calculated if N is given) For square matrices n is almost always m.
4020 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4021 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4022 .   i - row indices
4023 .   j - column indices
4024 -   a - matrix values
4025 
4026    Output Parameter:
4027 .   mat - the matrix
4028 
4029    Level: intermediate
4030 
4031    Notes:
4032        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4033      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4034      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4035 
4036        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4037 
4038        The format which is used for the sparse matrix input, is equivalent to a
4039     row-major ordering.. i.e for the following matrix, the input data expected is
4040     as shown
4041 
4042 $        1 0 0
4043 $        2 0 3     P0
4044 $       -------
4045 $        4 5 6     P1
4046 $
4047 $     Process0 [P0]: rows_owned=[0,1]
4048 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4049 $        j =  {0,0,2}  [size = 3]
4050 $        v =  {1,2,3}  [size = 3]
4051 $
4052 $     Process1 [P1]: rows_owned=[2]
4053 $        i =  {0,3}    [size = nrow+1  = 1+1]
4054 $        j =  {0,1,2}  [size = 3]
4055 $        v =  {4,5,6}  [size = 3]
4056 
4057 .keywords: matrix, aij, compressed row, sparse, parallel
4058 
4059 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4060           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4061 @*/
4062 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4063 {
4064   PetscErrorCode ierr;
4065 
4066   PetscFunctionBegin;
4067   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4068   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4069   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4070   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4071   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4072   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4073   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4074   PetscFunctionReturn(0);
4075 }
4076 
4077 /*@C
4078    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4079    (the default parallel PETSc format).  For good matrix assembly performance
4080    the user should preallocate the matrix storage by setting the parameters
4081    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4082    performance can be increased by more than a factor of 50.
4083 
4084    Collective on MPI_Comm
4085 
4086    Input Parameters:
4087 +  comm - MPI communicator
4088 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4089            This value should be the same as the local size used in creating the
4090            y vector for the matrix-vector product y = Ax.
4091 .  n - This value should be the same as the local size used in creating the
4092        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4093        calculated if N is given) For square matrices n is almost always m.
4094 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4095 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4096 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4097            (same value is used for all local rows)
4098 .  d_nnz - array containing the number of nonzeros in the various rows of the
4099            DIAGONAL portion of the local submatrix (possibly different for each row)
4100            or NULL, if d_nz is used to specify the nonzero structure.
4101            The size of this array is equal to the number of local rows, i.e 'm'.
4102 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4103            submatrix (same value is used for all local rows).
4104 -  o_nnz - array containing the number of nonzeros in the various rows of the
4105            OFF-DIAGONAL portion of the local submatrix (possibly different for
4106            each row) or NULL, if o_nz is used to specify the nonzero
4107            structure. The size of this array is equal to the number
4108            of local rows, i.e 'm'.
4109 
4110    Output Parameter:
4111 .  A - the matrix
4112 
4113    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4114    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4115    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4116 
4117    Notes:
4118    If the *_nnz parameter is given then the *_nz parameter is ignored
4119 
4120    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4121    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4122    storage requirements for this matrix.
4123 
4124    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4125    processor than it must be used on all processors that share the object for
4126    that argument.
4127 
4128    The user MUST specify either the local or global matrix dimensions
4129    (possibly both).
4130 
4131    The parallel matrix is partitioned across processors such that the
4132    first m0 rows belong to process 0, the next m1 rows belong to
4133    process 1, the next m2 rows belong to process 2 etc.. where
4134    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4135    values corresponding to [m x N] submatrix.
4136 
4137    The columns are logically partitioned with the n0 columns belonging
4138    to 0th partition, the next n1 columns belonging to the next
4139    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4140 
4141    The DIAGONAL portion of the local submatrix on any given processor
4142    is the submatrix corresponding to the rows and columns m,n
4143    corresponding to the given processor. i.e diagonal matrix on
4144    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4145    etc. The remaining portion of the local submatrix [m x (N-n)]
4146    constitute the OFF-DIAGONAL portion. The example below better
4147    illustrates this concept.
4148 
4149    For a square global matrix we define each processor's diagonal portion
4150    to be its local rows and the corresponding columns (a square submatrix);
4151    each processor's off-diagonal portion encompasses the remainder of the
4152    local matrix (a rectangular submatrix).
4153 
4154    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4155 
4156    When calling this routine with a single process communicator, a matrix of
4157    type SEQAIJ is returned.  If a matrix of type MATMPIAIJ is desired for this
4158    type of communicator, use the construction mechanism:
4159      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4160 
4161    By default, this format uses inodes (identical nodes) when possible.
4162    We search for consecutive rows with the same nonzero structure, thereby
4163    reusing matrix information to achieve increased efficiency.
4164 
4165    Options Database Keys:
4166 +  -mat_no_inode  - Do not use inodes
4167 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4168 -  -mat_aij_oneindex - Internally use indexing starting at 1
4169         rather than 0.  Note that when calling MatSetValues(),
4170         the user still MUST index entries starting at 0!
4171 
4172 
4173    Example usage:
4174 
4175    Consider the following 8x8 matrix with 34 non-zero values, that is
4176    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4177    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4178    as follows:
4179 
4180 .vb
4181             1  2  0  |  0  3  0  |  0  4
4182     Proc0   0  5  6  |  7  0  0  |  8  0
4183             9  0 10  | 11  0  0  | 12  0
4184     -------------------------------------
4185            13  0 14  | 15 16 17  |  0  0
4186     Proc1   0 18  0  | 19 20 21  |  0  0
4187             0  0  0  | 22 23  0  | 24  0
4188     -------------------------------------
4189     Proc2  25 26 27  |  0  0 28  | 29  0
4190            30  0  0  | 31 32 33  |  0 34
4191 .ve
4192 
4193    This can be represented as a collection of submatrices as:
4194 
4195 .vb
4196       A B C
4197       D E F
4198       G H I
4199 .ve
4200 
4201    Where the submatrices A,B,C are owned by proc0, D,E,F are
4202    owned by proc1, G,H,I are owned by proc2.
4203 
4204    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4205    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4206    The 'M','N' parameters are 8,8, and have the same values on all procs.
4207 
4208    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4209    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4210    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4211    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4212    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4213    matrix, ans [DF] as another SeqAIJ matrix.
4214 
4215    When d_nz, o_nz parameters are specified, d_nz storage elements are
4216    allocated for every row of the local diagonal submatrix, and o_nz
4217    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4218    One way to choose d_nz and o_nz is to use the max nonzerors per local
4219    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4220    In this case, the values of d_nz,o_nz are:
4221 .vb
4222      proc0 : dnz = 2, o_nz = 2
4223      proc1 : dnz = 3, o_nz = 2
4224      proc2 : dnz = 1, o_nz = 4
4225 .ve
4226    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4227    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4228    for proc3. i.e we are using 12+15+10=37 storage locations to store
4229    34 values.
4230 
4231    When d_nnz, o_nnz parameters are specified, the storage is specified
4232    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4233    In the above case the values for d_nnz,o_nnz are:
4234 .vb
4235      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4236      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4237      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4238 .ve
4239    Here the space allocated is sum of all the above values i.e 34, and
4240    hence pre-allocation is perfect.
4241 
4242    Level: intermediate
4243 
4244 .keywords: matrix, aij, compressed row, sparse, parallel
4245 
4246 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4247           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4248 @*/
4249 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4250 {
4251   PetscErrorCode ierr;
4252   PetscMPIInt    size;
4253 
4254   PetscFunctionBegin;
4255   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4256   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4257   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4258   if (size > 1) {
4259     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4260     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4261   } else {
4262     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4263     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4264   }
4265   PetscFunctionReturn(0);
4266 }
4267 
4268 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4269 {
4270   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4271   PetscBool      flg;
4272   PetscErrorCode ierr;
4273 
4274   PetscFunctionBegin;
4275   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4276   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4277   if (Ad)     *Ad     = a->A;
4278   if (Ao)     *Ao     = a->B;
4279   if (colmap) *colmap = a->garray;
4280   PetscFunctionReturn(0);
4281 }
4282 
4283 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4284 {
4285   PetscErrorCode ierr;
4286   PetscInt       m,N,i,rstart,nnz,Ii;
4287   PetscInt       *indx;
4288   PetscScalar    *values;
4289 
4290   PetscFunctionBegin;
4291   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4292   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4293     PetscInt       *dnz,*onz,sum,bs,cbs;
4294 
4295     if (n == PETSC_DECIDE) {
4296       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4297     }
4298     /* Check sum(n) = N */
4299     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4300     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4301 
4302     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4303     rstart -= m;
4304 
4305     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4306     for (i=0; i<m; i++) {
4307       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4308       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4309       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4310     }
4311 
4312     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4313     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4314     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4315     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4316     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4317     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4318     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4319     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4320   }
4321 
4322   /* numeric phase */
4323   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4324   for (i=0; i<m; i++) {
4325     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4326     Ii   = i + rstart;
4327     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4328     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4329   }
4330   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4331   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4332   PetscFunctionReturn(0);
4333 }
4334 
4335 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4336 {
4337   PetscErrorCode    ierr;
4338   PetscMPIInt       rank;
4339   PetscInt          m,N,i,rstart,nnz;
4340   size_t            len;
4341   const PetscInt    *indx;
4342   PetscViewer       out;
4343   char              *name;
4344   Mat               B;
4345   const PetscScalar *values;
4346 
4347   PetscFunctionBegin;
4348   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4349   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4350   /* Should this be the type of the diagonal block of A? */
4351   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4352   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4353   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4354   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4355   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4356   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4357   for (i=0; i<m; i++) {
4358     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4359     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4360     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4361   }
4362   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4363   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4364 
4365   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4366   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4367   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4368   sprintf(name,"%s.%d",outfile,rank);
4369   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4370   ierr = PetscFree(name);CHKERRQ(ierr);
4371   ierr = MatView(B,out);CHKERRQ(ierr);
4372   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4373   ierr = MatDestroy(&B);CHKERRQ(ierr);
4374   PetscFunctionReturn(0);
4375 }
4376 
4377 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4378 {
4379   PetscErrorCode      ierr;
4380   Mat_Merge_SeqsToMPI *merge;
4381   PetscContainer      container;
4382 
4383   PetscFunctionBegin;
4384   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4385   if (container) {
4386     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4387     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4388     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4389     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4390     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4391     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4392     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4393     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4394     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4395     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4396     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4397     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4398     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4399     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4400     ierr = PetscFree(merge);CHKERRQ(ierr);
4401     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4402   }
4403   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4404   PetscFunctionReturn(0);
4405 }
4406 
4407 #include <../src/mat/utils/freespace.h>
4408 #include <petscbt.h>
4409 
4410 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4411 {
4412   PetscErrorCode      ierr;
4413   MPI_Comm            comm;
4414   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4415   PetscMPIInt         size,rank,taga,*len_s;
4416   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4417   PetscInt            proc,m;
4418   PetscInt            **buf_ri,**buf_rj;
4419   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4420   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4421   MPI_Request         *s_waits,*r_waits;
4422   MPI_Status          *status;
4423   MatScalar           *aa=a->a;
4424   MatScalar           **abuf_r,*ba_i;
4425   Mat_Merge_SeqsToMPI *merge;
4426   PetscContainer      container;
4427 
4428   PetscFunctionBegin;
4429   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4430   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4431 
4432   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4433   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4434 
4435   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4436   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4437 
4438   bi     = merge->bi;
4439   bj     = merge->bj;
4440   buf_ri = merge->buf_ri;
4441   buf_rj = merge->buf_rj;
4442 
4443   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4444   owners = merge->rowmap->range;
4445   len_s  = merge->len_s;
4446 
4447   /* send and recv matrix values */
4448   /*-----------------------------*/
4449   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4450   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4451 
4452   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4453   for (proc=0,k=0; proc<size; proc++) {
4454     if (!len_s[proc]) continue;
4455     i    = owners[proc];
4456     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4457     k++;
4458   }
4459 
4460   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4461   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4462   ierr = PetscFree(status);CHKERRQ(ierr);
4463 
4464   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4465   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4466 
4467   /* insert mat values of mpimat */
4468   /*----------------------------*/
4469   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4470   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4471 
4472   for (k=0; k<merge->nrecv; k++) {
4473     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4474     nrows       = *(buf_ri_k[k]);
4475     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4476     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4477   }
4478 
4479   /* set values of ba */
4480   m = merge->rowmap->n;
4481   for (i=0; i<m; i++) {
4482     arow = owners[rank] + i;
4483     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4484     bnzi = bi[i+1] - bi[i];
4485     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4486 
4487     /* add local non-zero vals of this proc's seqmat into ba */
4488     anzi   = ai[arow+1] - ai[arow];
4489     aj     = a->j + ai[arow];
4490     aa     = a->a + ai[arow];
4491     nextaj = 0;
4492     for (j=0; nextaj<anzi; j++) {
4493       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4494         ba_i[j] += aa[nextaj++];
4495       }
4496     }
4497 
4498     /* add received vals into ba */
4499     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4500       /* i-th row */
4501       if (i == *nextrow[k]) {
4502         anzi   = *(nextai[k]+1) - *nextai[k];
4503         aj     = buf_rj[k] + *(nextai[k]);
4504         aa     = abuf_r[k] + *(nextai[k]);
4505         nextaj = 0;
4506         for (j=0; nextaj<anzi; j++) {
4507           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4508             ba_i[j] += aa[nextaj++];
4509           }
4510         }
4511         nextrow[k]++; nextai[k]++;
4512       }
4513     }
4514     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4515   }
4516   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4517   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4518 
4519   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4520   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4521   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4522   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4523   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4524   PetscFunctionReturn(0);
4525 }
4526 
4527 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4528 {
4529   PetscErrorCode      ierr;
4530   Mat                 B_mpi;
4531   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4532   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4533   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4534   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4535   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4536   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4537   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4538   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4539   MPI_Status          *status;
4540   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4541   PetscBT             lnkbt;
4542   Mat_Merge_SeqsToMPI *merge;
4543   PetscContainer      container;
4544 
4545   PetscFunctionBegin;
4546   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4547 
4548   /* make sure it is a PETSc comm */
4549   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4550   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4551   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4552 
4553   ierr = PetscNew(&merge);CHKERRQ(ierr);
4554   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4555 
4556   /* determine row ownership */
4557   /*---------------------------------------------------------*/
4558   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4559   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4560   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4561   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4562   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4563   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4564   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4565 
4566   m      = merge->rowmap->n;
4567   owners = merge->rowmap->range;
4568 
4569   /* determine the number of messages to send, their lengths */
4570   /*---------------------------------------------------------*/
4571   len_s = merge->len_s;
4572 
4573   len          = 0; /* length of buf_si[] */
4574   merge->nsend = 0;
4575   for (proc=0; proc<size; proc++) {
4576     len_si[proc] = 0;
4577     if (proc == rank) {
4578       len_s[proc] = 0;
4579     } else {
4580       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4581       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4582     }
4583     if (len_s[proc]) {
4584       merge->nsend++;
4585       nrows = 0;
4586       for (i=owners[proc]; i<owners[proc+1]; i++) {
4587         if (ai[i+1] > ai[i]) nrows++;
4588       }
4589       len_si[proc] = 2*(nrows+1);
4590       len         += len_si[proc];
4591     }
4592   }
4593 
4594   /* determine the number and length of messages to receive for ij-structure */
4595   /*-------------------------------------------------------------------------*/
4596   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4597   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4598 
4599   /* post the Irecv of j-structure */
4600   /*-------------------------------*/
4601   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4602   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4603 
4604   /* post the Isend of j-structure */
4605   /*--------------------------------*/
4606   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4607 
4608   for (proc=0, k=0; proc<size; proc++) {
4609     if (!len_s[proc]) continue;
4610     i    = owners[proc];
4611     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4612     k++;
4613   }
4614 
4615   /* receives and sends of j-structure are complete */
4616   /*------------------------------------------------*/
4617   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4618   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4619 
4620   /* send and recv i-structure */
4621   /*---------------------------*/
4622   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4623   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4624 
4625   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4626   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4627   for (proc=0,k=0; proc<size; proc++) {
4628     if (!len_s[proc]) continue;
4629     /* form outgoing message for i-structure:
4630          buf_si[0]:                 nrows to be sent
4631                [1:nrows]:           row index (global)
4632                [nrows+1:2*nrows+1]: i-structure index
4633     */
4634     /*-------------------------------------------*/
4635     nrows       = len_si[proc]/2 - 1;
4636     buf_si_i    = buf_si + nrows+1;
4637     buf_si[0]   = nrows;
4638     buf_si_i[0] = 0;
4639     nrows       = 0;
4640     for (i=owners[proc]; i<owners[proc+1]; i++) {
4641       anzi = ai[i+1] - ai[i];
4642       if (anzi) {
4643         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4644         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4645         nrows++;
4646       }
4647     }
4648     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4649     k++;
4650     buf_si += len_si[proc];
4651   }
4652 
4653   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4654   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4655 
4656   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4657   for (i=0; i<merge->nrecv; i++) {
4658     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4659   }
4660 
4661   ierr = PetscFree(len_si);CHKERRQ(ierr);
4662   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4663   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4664   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4665   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4666   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4667   ierr = PetscFree(status);CHKERRQ(ierr);
4668 
4669   /* compute a local seq matrix in each processor */
4670   /*----------------------------------------------*/
4671   /* allocate bi array and free space for accumulating nonzero column info */
4672   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4673   bi[0] = 0;
4674 
4675   /* create and initialize a linked list */
4676   nlnk = N+1;
4677   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4678 
4679   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4680   len  = ai[owners[rank+1]] - ai[owners[rank]];
4681   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4682 
4683   current_space = free_space;
4684 
4685   /* determine symbolic info for each local row */
4686   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4687 
4688   for (k=0; k<merge->nrecv; k++) {
4689     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4690     nrows       = *buf_ri_k[k];
4691     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4692     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4693   }
4694 
4695   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4696   len  = 0;
4697   for (i=0; i<m; i++) {
4698     bnzi = 0;
4699     /* add local non-zero cols of this proc's seqmat into lnk */
4700     arow  = owners[rank] + i;
4701     anzi  = ai[arow+1] - ai[arow];
4702     aj    = a->j + ai[arow];
4703     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4704     bnzi += nlnk;
4705     /* add received col data into lnk */
4706     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4707       if (i == *nextrow[k]) { /* i-th row */
4708         anzi  = *(nextai[k]+1) - *nextai[k];
4709         aj    = buf_rj[k] + *nextai[k];
4710         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4711         bnzi += nlnk;
4712         nextrow[k]++; nextai[k]++;
4713       }
4714     }
4715     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4716 
4717     /* if free space is not available, make more free space */
4718     if (current_space->local_remaining<bnzi) {
4719       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4720       nspacedouble++;
4721     }
4722     /* copy data into free space, then initialize lnk */
4723     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4724     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4725 
4726     current_space->array           += bnzi;
4727     current_space->local_used      += bnzi;
4728     current_space->local_remaining -= bnzi;
4729 
4730     bi[i+1] = bi[i] + bnzi;
4731   }
4732 
4733   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4734 
4735   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4736   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4737   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4738 
4739   /* create symbolic parallel matrix B_mpi */
4740   /*---------------------------------------*/
4741   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4742   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4743   if (n==PETSC_DECIDE) {
4744     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4745   } else {
4746     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4747   }
4748   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4749   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4750   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4751   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4752   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4753 
4754   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4755   B_mpi->assembled    = PETSC_FALSE;
4756   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4757   merge->bi           = bi;
4758   merge->bj           = bj;
4759   merge->buf_ri       = buf_ri;
4760   merge->buf_rj       = buf_rj;
4761   merge->coi          = NULL;
4762   merge->coj          = NULL;
4763   merge->owners_co    = NULL;
4764 
4765   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4766 
4767   /* attach the supporting struct to B_mpi for reuse */
4768   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4769   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4770   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4771   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4772   *mpimat = B_mpi;
4773 
4774   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4775   PetscFunctionReturn(0);
4776 }
4777 
4778 /*@C
4779       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4780                  matrices from each processor
4781 
4782     Collective on MPI_Comm
4783 
4784    Input Parameters:
4785 +    comm - the communicators the parallel matrix will live on
4786 .    seqmat - the input sequential matrices
4787 .    m - number of local rows (or PETSC_DECIDE)
4788 .    n - number of local columns (or PETSC_DECIDE)
4789 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4790 
4791    Output Parameter:
4792 .    mpimat - the parallel matrix generated
4793 
4794     Level: advanced
4795 
4796    Notes:
4797      The dimensions of the sequential matrix in each processor MUST be the same.
4798      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4799      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4800 @*/
4801 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4802 {
4803   PetscErrorCode ierr;
4804   PetscMPIInt    size;
4805 
4806   PetscFunctionBegin;
4807   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4808   if (size == 1) {
4809     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4810     if (scall == MAT_INITIAL_MATRIX) {
4811       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4812     } else {
4813       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4814     }
4815     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4816     PetscFunctionReturn(0);
4817   }
4818   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4819   if (scall == MAT_INITIAL_MATRIX) {
4820     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4821   }
4822   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4823   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4824   PetscFunctionReturn(0);
4825 }
4826 
4827 /*@
4828      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4829           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4830           with MatGetSize()
4831 
4832     Not Collective
4833 
4834    Input Parameters:
4835 +    A - the matrix
4836 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4837 
4838    Output Parameter:
4839 .    A_loc - the local sequential matrix generated
4840 
4841     Level: developer
4842 
4843 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4844 
4845 @*/
4846 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4847 {
4848   PetscErrorCode ierr;
4849   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4850   Mat_SeqAIJ     *mat,*a,*b;
4851   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4852   MatScalar      *aa,*ba,*cam;
4853   PetscScalar    *ca;
4854   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4855   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4856   PetscBool      match;
4857   MPI_Comm       comm;
4858   PetscMPIInt    size;
4859 
4860   PetscFunctionBegin;
4861   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4862   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4863   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4864   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4865   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4866 
4867   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4868   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4869   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4870   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4871   aa = a->a; ba = b->a;
4872   if (scall == MAT_INITIAL_MATRIX) {
4873     if (size == 1) {
4874       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4875       PetscFunctionReturn(0);
4876     }
4877 
4878     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4879     ci[0] = 0;
4880     for (i=0; i<am; i++) {
4881       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4882     }
4883     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4884     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4885     k    = 0;
4886     for (i=0; i<am; i++) {
4887       ncols_o = bi[i+1] - bi[i];
4888       ncols_d = ai[i+1] - ai[i];
4889       /* off-diagonal portion of A */
4890       for (jo=0; jo<ncols_o; jo++) {
4891         col = cmap[*bj];
4892         if (col >= cstart) break;
4893         cj[k]   = col; bj++;
4894         ca[k++] = *ba++;
4895       }
4896       /* diagonal portion of A */
4897       for (j=0; j<ncols_d; j++) {
4898         cj[k]   = cstart + *aj++;
4899         ca[k++] = *aa++;
4900       }
4901       /* off-diagonal portion of A */
4902       for (j=jo; j<ncols_o; j++) {
4903         cj[k]   = cmap[*bj++];
4904         ca[k++] = *ba++;
4905       }
4906     }
4907     /* put together the new matrix */
4908     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4909     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4910     /* Since these are PETSc arrays, change flags to free them as necessary. */
4911     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4912     mat->free_a  = PETSC_TRUE;
4913     mat->free_ij = PETSC_TRUE;
4914     mat->nonew   = 0;
4915   } else if (scall == MAT_REUSE_MATRIX) {
4916     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4917     ci = mat->i; cj = mat->j; cam = mat->a;
4918     for (i=0; i<am; i++) {
4919       /* off-diagonal portion of A */
4920       ncols_o = bi[i+1] - bi[i];
4921       for (jo=0; jo<ncols_o; jo++) {
4922         col = cmap[*bj];
4923         if (col >= cstart) break;
4924         *cam++ = *ba++; bj++;
4925       }
4926       /* diagonal portion of A */
4927       ncols_d = ai[i+1] - ai[i];
4928       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4929       /* off-diagonal portion of A */
4930       for (j=jo; j<ncols_o; j++) {
4931         *cam++ = *ba++; bj++;
4932       }
4933     }
4934   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4935   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4936   PetscFunctionReturn(0);
4937 }
4938 
4939 /*@C
4940      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4941 
4942     Not Collective
4943 
4944    Input Parameters:
4945 +    A - the matrix
4946 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4947 -    row, col - index sets of rows and columns to extract (or NULL)
4948 
4949    Output Parameter:
4950 .    A_loc - the local sequential matrix generated
4951 
4952     Level: developer
4953 
4954 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4955 
4956 @*/
4957 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4958 {
4959   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4960   PetscErrorCode ierr;
4961   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4962   IS             isrowa,iscola;
4963   Mat            *aloc;
4964   PetscBool      match;
4965 
4966   PetscFunctionBegin;
4967   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4968   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4969   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4970   if (!row) {
4971     start = A->rmap->rstart; end = A->rmap->rend;
4972     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4973   } else {
4974     isrowa = *row;
4975   }
4976   if (!col) {
4977     start = A->cmap->rstart;
4978     cmap  = a->garray;
4979     nzA   = a->A->cmap->n;
4980     nzB   = a->B->cmap->n;
4981     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4982     ncols = 0;
4983     for (i=0; i<nzB; i++) {
4984       if (cmap[i] < start) idx[ncols++] = cmap[i];
4985       else break;
4986     }
4987     imark = i;
4988     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4989     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4990     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4991   } else {
4992     iscola = *col;
4993   }
4994   if (scall != MAT_INITIAL_MATRIX) {
4995     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4996     aloc[0] = *A_loc;
4997   }
4998   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4999   *A_loc = aloc[0];
5000   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5001   if (!row) {
5002     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5003   }
5004   if (!col) {
5005     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5006   }
5007   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5008   PetscFunctionReturn(0);
5009 }
5010 
5011 /*@C
5012     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5013 
5014     Collective on Mat
5015 
5016    Input Parameters:
5017 +    A,B - the matrices in mpiaij format
5018 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5019 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5020 
5021    Output Parameter:
5022 +    rowb, colb - index sets of rows and columns of B to extract
5023 -    B_seq - the sequential matrix generated
5024 
5025     Level: developer
5026 
5027 @*/
5028 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5029 {
5030   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5031   PetscErrorCode ierr;
5032   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5033   IS             isrowb,iscolb;
5034   Mat            *bseq=NULL;
5035 
5036   PetscFunctionBegin;
5037   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5038     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5039   }
5040   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5041 
5042   if (scall == MAT_INITIAL_MATRIX) {
5043     start = A->cmap->rstart;
5044     cmap  = a->garray;
5045     nzA   = a->A->cmap->n;
5046     nzB   = a->B->cmap->n;
5047     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5048     ncols = 0;
5049     for (i=0; i<nzB; i++) {  /* row < local row index */
5050       if (cmap[i] < start) idx[ncols++] = cmap[i];
5051       else break;
5052     }
5053     imark = i;
5054     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5055     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5056     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5057     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5058   } else {
5059     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5060     isrowb  = *rowb; iscolb = *colb;
5061     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5062     bseq[0] = *B_seq;
5063   }
5064   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5065   *B_seq = bseq[0];
5066   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5067   if (!rowb) {
5068     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5069   } else {
5070     *rowb = isrowb;
5071   }
5072   if (!colb) {
5073     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5074   } else {
5075     *colb = iscolb;
5076   }
5077   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5078   PetscFunctionReturn(0);
5079 }
5080 
5081 /*
5082     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5083     of the OFF-DIAGONAL portion of local A
5084 
5085     Collective on Mat
5086 
5087    Input Parameters:
5088 +    A,B - the matrices in mpiaij format
5089 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5090 
5091    Output Parameter:
5092 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5093 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5094 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5095 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5096 
5097     Level: developer
5098 
5099 */
5100 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5101 {
5102   VecScatter_MPI_General *gen_to,*gen_from;
5103   PetscErrorCode         ierr;
5104   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5105   Mat_SeqAIJ             *b_oth;
5106   VecScatter             ctx =a->Mvctx;
5107   MPI_Comm               comm;
5108   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5109   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5110   PetscInt               *rvalues,*svalues;
5111   MatScalar              *b_otha,*bufa,*bufA;
5112   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5113   MPI_Request            *rwaits = NULL,*swaits = NULL;
5114   MPI_Status             *sstatus,rstatus;
5115   PetscMPIInt            jj,size;
5116   PetscInt               *cols,sbs,rbs;
5117   PetscScalar            *vals;
5118 
5119   PetscFunctionBegin;
5120   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5121   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5122 
5123   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5124     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5125   }
5126   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5127   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5128 
5129   if (size == 1) {
5130     startsj_s = NULL;
5131     bufa_ptr  = NULL;
5132     *B_oth    = NULL;
5133     PetscFunctionReturn(0);
5134   }
5135 
5136   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5137   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5138   nrecvs   = gen_from->n;
5139   nsends   = gen_to->n;
5140 
5141   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5142   srow    = gen_to->indices;    /* local row index to be sent */
5143   sstarts = gen_to->starts;
5144   sprocs  = gen_to->procs;
5145   sstatus = gen_to->sstatus;
5146   sbs     = gen_to->bs;
5147   rstarts = gen_from->starts;
5148   rprocs  = gen_from->procs;
5149   rbs     = gen_from->bs;
5150 
5151   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5152   if (scall == MAT_INITIAL_MATRIX) {
5153     /* i-array */
5154     /*---------*/
5155     /*  post receives */
5156     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5157     for (i=0; i<nrecvs; i++) {
5158       rowlen = rvalues + rstarts[i]*rbs;
5159       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5160       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5161     }
5162 
5163     /* pack the outgoing message */
5164     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5165 
5166     sstartsj[0] = 0;
5167     rstartsj[0] = 0;
5168     len         = 0; /* total length of j or a array to be sent */
5169     k           = 0;
5170     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5171     for (i=0; i<nsends; i++) {
5172       rowlen = svalues + sstarts[i]*sbs;
5173       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5174       for (j=0; j<nrows; j++) {
5175         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5176         for (l=0; l<sbs; l++) {
5177           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5178 
5179           rowlen[j*sbs+l] = ncols;
5180 
5181           len += ncols;
5182           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5183         }
5184         k++;
5185       }
5186       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5187 
5188       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5189     }
5190     /* recvs and sends of i-array are completed */
5191     i = nrecvs;
5192     while (i--) {
5193       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5194     }
5195     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5196     ierr = PetscFree(svalues);CHKERRQ(ierr);
5197 
5198     /* allocate buffers for sending j and a arrays */
5199     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5200     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5201 
5202     /* create i-array of B_oth */
5203     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5204 
5205     b_othi[0] = 0;
5206     len       = 0; /* total length of j or a array to be received */
5207     k         = 0;
5208     for (i=0; i<nrecvs; i++) {
5209       rowlen = rvalues + rstarts[i]*rbs;
5210       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5211       for (j=0; j<nrows; j++) {
5212         b_othi[k+1] = b_othi[k] + rowlen[j];
5213         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5214         k++;
5215       }
5216       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5217     }
5218     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5219 
5220     /* allocate space for j and a arrrays of B_oth */
5221     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5222     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5223 
5224     /* j-array */
5225     /*---------*/
5226     /*  post receives of j-array */
5227     for (i=0; i<nrecvs; i++) {
5228       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5229       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5230     }
5231 
5232     /* pack the outgoing message j-array */
5233     k = 0;
5234     for (i=0; i<nsends; i++) {
5235       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5236       bufJ  = bufj+sstartsj[i];
5237       for (j=0; j<nrows; j++) {
5238         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5239         for (ll=0; ll<sbs; ll++) {
5240           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5241           for (l=0; l<ncols; l++) {
5242             *bufJ++ = cols[l];
5243           }
5244           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5245         }
5246       }
5247       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5248     }
5249 
5250     /* recvs and sends of j-array are completed */
5251     i = nrecvs;
5252     while (i--) {
5253       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5254     }
5255     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5256   } else if (scall == MAT_REUSE_MATRIX) {
5257     sstartsj = *startsj_s;
5258     rstartsj = *startsj_r;
5259     bufa     = *bufa_ptr;
5260     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5261     b_otha   = b_oth->a;
5262   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5263 
5264   /* a-array */
5265   /*---------*/
5266   /*  post receives of a-array */
5267   for (i=0; i<nrecvs; i++) {
5268     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5269     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5270   }
5271 
5272   /* pack the outgoing message a-array */
5273   k = 0;
5274   for (i=0; i<nsends; i++) {
5275     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5276     bufA  = bufa+sstartsj[i];
5277     for (j=0; j<nrows; j++) {
5278       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5279       for (ll=0; ll<sbs; ll++) {
5280         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5281         for (l=0; l<ncols; l++) {
5282           *bufA++ = vals[l];
5283         }
5284         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5285       }
5286     }
5287     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5288   }
5289   /* recvs and sends of a-array are completed */
5290   i = nrecvs;
5291   while (i--) {
5292     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5293   }
5294   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5295   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5296 
5297   if (scall == MAT_INITIAL_MATRIX) {
5298     /* put together the new matrix */
5299     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5300 
5301     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5302     /* Since these are PETSc arrays, change flags to free them as necessary. */
5303     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5304     b_oth->free_a  = PETSC_TRUE;
5305     b_oth->free_ij = PETSC_TRUE;
5306     b_oth->nonew   = 0;
5307 
5308     ierr = PetscFree(bufj);CHKERRQ(ierr);
5309     if (!startsj_s || !bufa_ptr) {
5310       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5311       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5312     } else {
5313       *startsj_s = sstartsj;
5314       *startsj_r = rstartsj;
5315       *bufa_ptr  = bufa;
5316     }
5317   }
5318   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5319   PetscFunctionReturn(0);
5320 }
5321 
5322 /*@C
5323   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5324 
5325   Not Collective
5326 
5327   Input Parameters:
5328 . A - The matrix in mpiaij format
5329 
5330   Output Parameter:
5331 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5332 . colmap - A map from global column index to local index into lvec
5333 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5334 
5335   Level: developer
5336 
5337 @*/
5338 #if defined(PETSC_USE_CTABLE)
5339 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5340 #else
5341 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5342 #endif
5343 {
5344   Mat_MPIAIJ *a;
5345 
5346   PetscFunctionBegin;
5347   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5348   PetscValidPointer(lvec, 2);
5349   PetscValidPointer(colmap, 3);
5350   PetscValidPointer(multScatter, 4);
5351   a = (Mat_MPIAIJ*) A->data;
5352   if (lvec) *lvec = a->lvec;
5353   if (colmap) *colmap = a->colmap;
5354   if (multScatter) *multScatter = a->Mvctx;
5355   PetscFunctionReturn(0);
5356 }
5357 
5358 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5359 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5360 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5361 #if defined(PETSC_HAVE_ELEMENTAL)
5362 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5363 #endif
5364 #if defined(PETSC_HAVE_HYPRE)
5365 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5366 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5367 #endif
5368 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5369 
5370 /*
5371     Computes (B'*A')' since computing B*A directly is untenable
5372 
5373                n                       p                          p
5374         (              )       (              )         (                  )
5375       m (      A       )  *  n (       B      )   =   m (         C        )
5376         (              )       (              )         (                  )
5377 
5378 */
5379 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5380 {
5381   PetscErrorCode ierr;
5382   Mat            At,Bt,Ct;
5383 
5384   PetscFunctionBegin;
5385   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5386   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5387   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5388   ierr = MatDestroy(&At);CHKERRQ(ierr);
5389   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5390   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5391   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5392   PetscFunctionReturn(0);
5393 }
5394 
5395 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5396 {
5397   PetscErrorCode ierr;
5398   PetscInt       m=A->rmap->n,n=B->cmap->n;
5399   Mat            Cmat;
5400 
5401   PetscFunctionBegin;
5402   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5403   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5404   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5405   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5406   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5407   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5408   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5409   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5410 
5411   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5412 
5413   *C = Cmat;
5414   PetscFunctionReturn(0);
5415 }
5416 
5417 /* ----------------------------------------------------------------*/
5418 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5419 {
5420   PetscErrorCode ierr;
5421 
5422   PetscFunctionBegin;
5423   if (scall == MAT_INITIAL_MATRIX) {
5424     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5425     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5426     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5427   }
5428   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5429   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5430   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5431   PetscFunctionReturn(0);
5432 }
5433 
5434 /*MC
5435    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5436 
5437    Options Database Keys:
5438 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5439 
5440   Level: beginner
5441 
5442 .seealso: MatCreateAIJ()
5443 M*/
5444 
5445 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5446 {
5447   Mat_MPIAIJ     *b;
5448   PetscErrorCode ierr;
5449   PetscMPIInt    size;
5450 
5451   PetscFunctionBegin;
5452   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5453 
5454   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5455   B->data       = (void*)b;
5456   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5457   B->assembled  = PETSC_FALSE;
5458   B->insertmode = NOT_SET_VALUES;
5459   b->size       = size;
5460 
5461   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5462 
5463   /* build cache for off array entries formed */
5464   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5465 
5466   b->donotstash  = PETSC_FALSE;
5467   b->colmap      = 0;
5468   b->garray      = 0;
5469   b->roworiented = PETSC_TRUE;
5470 
5471   /* stuff used for matrix vector multiply */
5472   b->lvec  = NULL;
5473   b->Mvctx = NULL;
5474 
5475   /* stuff for MatGetRow() */
5476   b->rowindices   = 0;
5477   b->rowvalues    = 0;
5478   b->getrowactive = PETSC_FALSE;
5479 
5480   /* flexible pointer used in CUSP/CUSPARSE classes */
5481   b->spptr = NULL;
5482 
5483   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5484   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5485   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5486   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5487   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5488   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5489   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5490   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5491   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5492   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5493 #if defined(PETSC_HAVE_ELEMENTAL)
5494   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5495 #endif
5496 #if defined(PETSC_HAVE_HYPRE)
5497   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5498 #endif
5499   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5500   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5501   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5502   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5503 #if defined(PETSC_HAVE_HYPRE)
5504   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5505 #endif
5506   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5507   PetscFunctionReturn(0);
5508 }
5509 
5510 /*@C
5511      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5512          and "off-diagonal" part of the matrix in CSR format.
5513 
5514    Collective on MPI_Comm
5515 
5516    Input Parameters:
5517 +  comm - MPI communicator
5518 .  m - number of local rows (Cannot be PETSC_DECIDE)
5519 .  n - This value should be the same as the local size used in creating the
5520        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5521        calculated if N is given) For square matrices n is almost always m.
5522 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5523 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5524 .   i - row indices for "diagonal" portion of matrix
5525 .   j - column indices
5526 .   a - matrix values
5527 .   oi - row indices for "off-diagonal" portion of matrix
5528 .   oj - column indices
5529 -   oa - matrix values
5530 
5531    Output Parameter:
5532 .   mat - the matrix
5533 
5534    Level: advanced
5535 
5536    Notes:
5537        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5538        must free the arrays once the matrix has been destroyed and not before.
5539 
5540        The i and j indices are 0 based
5541 
5542        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5543 
5544        This sets local rows and cannot be used to set off-processor values.
5545 
5546        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5547        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5548        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5549        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5550        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5551        communication if it is known that only local entries will be set.
5552 
5553 .keywords: matrix, aij, compressed row, sparse, parallel
5554 
5555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5556           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5557 @*/
5558 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5559 {
5560   PetscErrorCode ierr;
5561   Mat_MPIAIJ     *maij;
5562 
5563   PetscFunctionBegin;
5564   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5565   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5566   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5567   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5568   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5569   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5570   maij = (Mat_MPIAIJ*) (*mat)->data;
5571 
5572   (*mat)->preallocated = PETSC_TRUE;
5573 
5574   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5575   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5576 
5577   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5578   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5579 
5580   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5581   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5582   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5583   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5584 
5585   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5586   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5587   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5588   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5589   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5590   PetscFunctionReturn(0);
5591 }
5592 
5593 /*
5594     Special version for direct calls from Fortran
5595 */
5596 #include <petsc/private/fortranimpl.h>
5597 
5598 /* Change these macros so can be used in void function */
5599 #undef CHKERRQ
5600 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5601 #undef SETERRQ2
5602 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5603 #undef SETERRQ3
5604 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5605 #undef SETERRQ
5606 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5607 
5608 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5609 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5610 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5611 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5612 #else
5613 #endif
5614 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5615 {
5616   Mat            mat  = *mmat;
5617   PetscInt       m    = *mm, n = *mn;
5618   InsertMode     addv = *maddv;
5619   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5620   PetscScalar    value;
5621   PetscErrorCode ierr;
5622 
5623   MatCheckPreallocated(mat,1);
5624   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5625 
5626 #if defined(PETSC_USE_DEBUG)
5627   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5628 #endif
5629   {
5630     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5631     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5632     PetscBool roworiented = aij->roworiented;
5633 
5634     /* Some Variables required in the macro */
5635     Mat        A                 = aij->A;
5636     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5637     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5638     MatScalar  *aa               = a->a;
5639     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5640     Mat        B                 = aij->B;
5641     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5642     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5643     MatScalar  *ba               = b->a;
5644 
5645     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5646     PetscInt  nonew = a->nonew;
5647     MatScalar *ap1,*ap2;
5648 
5649     PetscFunctionBegin;
5650     for (i=0; i<m; i++) {
5651       if (im[i] < 0) continue;
5652 #if defined(PETSC_USE_DEBUG)
5653       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5654 #endif
5655       if (im[i] >= rstart && im[i] < rend) {
5656         row      = im[i] - rstart;
5657         lastcol1 = -1;
5658         rp1      = aj + ai[row];
5659         ap1      = aa + ai[row];
5660         rmax1    = aimax[row];
5661         nrow1    = ailen[row];
5662         low1     = 0;
5663         high1    = nrow1;
5664         lastcol2 = -1;
5665         rp2      = bj + bi[row];
5666         ap2      = ba + bi[row];
5667         rmax2    = bimax[row];
5668         nrow2    = bilen[row];
5669         low2     = 0;
5670         high2    = nrow2;
5671 
5672         for (j=0; j<n; j++) {
5673           if (roworiented) value = v[i*n+j];
5674           else value = v[i+j*m];
5675           if (in[j] >= cstart && in[j] < cend) {
5676             col = in[j] - cstart;
5677             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5678             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5679           } else if (in[j] < 0) continue;
5680 #if defined(PETSC_USE_DEBUG)
5681           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5682 #endif
5683           else {
5684             if (mat->was_assembled) {
5685               if (!aij->colmap) {
5686                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5687               }
5688 #if defined(PETSC_USE_CTABLE)
5689               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5690               col--;
5691 #else
5692               col = aij->colmap[in[j]] - 1;
5693 #endif
5694               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5695               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5696                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5697                 col  =  in[j];
5698                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5699                 B     = aij->B;
5700                 b     = (Mat_SeqAIJ*)B->data;
5701                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5702                 rp2   = bj + bi[row];
5703                 ap2   = ba + bi[row];
5704                 rmax2 = bimax[row];
5705                 nrow2 = bilen[row];
5706                 low2  = 0;
5707                 high2 = nrow2;
5708                 bm    = aij->B->rmap->n;
5709                 ba    = b->a;
5710               }
5711             } else col = in[j];
5712             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5713           }
5714         }
5715       } else if (!aij->donotstash) {
5716         if (roworiented) {
5717           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5718         } else {
5719           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5720         }
5721       }
5722     }
5723   }
5724   PetscFunctionReturnVoid();
5725 }
5726 
5727