xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 040216a43ff5238a388220632b296a83505ff344)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 
135 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
138   PetscErrorCode ierr;
139   PetscInt       i,rstart,nrows,*rows;
140 
141   PetscFunctionBegin;
142   *zrows = NULL;
143   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
144   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
145   for (i=0; i<nrows; i++) rows[i] += rstart;
146   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
151 {
152   PetscErrorCode ierr;
153   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
154   PetscInt       i,n,*garray = aij->garray;
155   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
156   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
157   PetscReal      *work;
158 
159   PetscFunctionBegin;
160   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
161   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
162   if (type == NORM_2) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
168     }
169   } else if (type == NORM_1) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
175     }
176   } else if (type == NORM_INFINITY) {
177     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
178       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
179     }
180     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
181       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
182     }
183 
184   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
185   if (type == NORM_INFINITY) {
186     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
187   } else {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   }
190   ierr = PetscFree(work);CHKERRQ(ierr);
191   if (type == NORM_2) {
192     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
193   }
194   PetscFunctionReturn(0);
195 }
196 
197 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
198 {
199   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
200   IS              sis,gis;
201   PetscErrorCode  ierr;
202   const PetscInt  *isis,*igis;
203   PetscInt        n,*iis,nsis,ngis,rstart,i;
204 
205   PetscFunctionBegin;
206   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
207   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
208   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
209   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
210   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
211   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
212 
213   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
215   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
216   n    = ngis + nsis;
217   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
218   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
219   for (i=0; i<n; i++) iis[i] += rstart;
220   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
221 
222   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
223   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
224   ierr = ISDestroy(&sis);CHKERRQ(ierr);
225   ierr = ISDestroy(&gis);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 /*
230     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
231     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
232 
233     Only for square matrices
234 
235     Used by a preconditioner, hence PETSC_EXTERN
236 */
237 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
238 {
239   PetscMPIInt    rank,size;
240   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
241   PetscErrorCode ierr;
242   Mat            mat;
243   Mat_SeqAIJ     *gmata;
244   PetscMPIInt    tag;
245   MPI_Status     status;
246   PetscBool      aij;
247   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
248 
249   PetscFunctionBegin;
250   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
251   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
252   if (!rank) {
253     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
254     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
255   }
256   if (reuse == MAT_INITIAL_MATRIX) {
257     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
258     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
259     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
260     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
261     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
262     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
263     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
264     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
265     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
266 
267     rowners[0] = 0;
268     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
269     rstart = rowners[rank];
270     rend   = rowners[rank+1];
271     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
272     if (!rank) {
273       gmata = (Mat_SeqAIJ*) gmat->data;
274       /* send row lengths to all processors */
275       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
276       for (i=1; i<size; i++) {
277         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
278       }
279       /* determine number diagonal and off-diagonal counts */
280       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
281       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
282       jj   = 0;
283       for (i=0; i<m; i++) {
284         for (j=0; j<dlens[i]; j++) {
285           if (gmata->j[jj] < rstart) ld[i]++;
286           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
287           jj++;
288         }
289       }
290       /* send column indices to other processes */
291       for (i=1; i<size; i++) {
292         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
293         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
295       }
296 
297       /* send numerical values to other processes */
298       for (i=1; i<size; i++) {
299         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
300         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
301       }
302       gmataa = gmata->a;
303       gmataj = gmata->j;
304 
305     } else {
306       /* receive row lengths */
307       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       /* receive column indices */
309       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
311       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       /* determine number diagonal and off-diagonal counts */
313       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
314       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
315       jj   = 0;
316       for (i=0; i<m; i++) {
317         for (j=0; j<dlens[i]; j++) {
318           if (gmataj[jj] < rstart) ld[i]++;
319           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
320           jj++;
321         }
322       }
323       /* receive numerical values */
324       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
325       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
326     }
327     /* set preallocation */
328     for (i=0; i<m; i++) {
329       dlens[i] -= olens[i];
330     }
331     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
332     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
333 
334     for (i=0; i<m; i++) {
335       dlens[i] += olens[i];
336     }
337     cnt = 0;
338     for (i=0; i<m; i++) {
339       row  = rstart + i;
340       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
341       cnt += dlens[i];
342     }
343     if (rank) {
344       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
345     }
346     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
347     ierr = PetscFree(rowners);CHKERRQ(ierr);
348 
349     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
350 
351     *inmat = mat;
352   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
353     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
354     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
355     mat  = *inmat;
356     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
357     if (!rank) {
358       /* send numerical values to other processes */
359       gmata  = (Mat_SeqAIJ*) gmat->data;
360       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
361       gmataa = gmata->a;
362       for (i=1; i<size; i++) {
363         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
364         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
365       }
366       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
367     } else {
368       /* receive numerical values from process 0*/
369       nz   = Ad->nz + Ao->nz;
370       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
371       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
372     }
373     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
374     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
375     ad = Ad->a;
376     ao = Ao->a;
377     if (mat->rmap->n) {
378       i  = 0;
379       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
380       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
381     }
382     for (i=1; i<mat->rmap->n; i++) {
383       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
384       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
385     }
386     i--;
387     if (mat->rmap->n) {
388       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
389     }
390     if (rank) {
391       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
392     }
393   }
394   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
396   PetscFunctionReturn(0);
397 }
398 
399 /*
400   Local utility routine that creates a mapping from the global column
401 number to the local number in the off-diagonal part of the local
402 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
403 a slightly higher hash table cost; without it it is not scalable (each processor
404 has an order N integer array but is fast to acess.
405 */
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
499 {
500   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
501   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
502   PetscErrorCode ierr;
503   PetscInt       l,*garray = mat->garray,diag;
504 
505   PetscFunctionBegin;
506   /* code only works for square matrices A */
507 
508   /* find size of row to the left of the diagonal part */
509   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
510   row  = row - diag;
511   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
512     if (garray[b->j[b->i[row]+l]] > diag) break;
513   }
514   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
515 
516   /* diagonal part */
517   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
518 
519   /* right of diagonal part */
520   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
521   PetscFunctionReturn(0);
522 }
523 
524 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
525 {
526   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
527   PetscScalar    value;
528   PetscErrorCode ierr;
529   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
530   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
531   PetscBool      roworiented = aij->roworiented;
532 
533   /* Some Variables required in the macro */
534   Mat        A                 = aij->A;
535   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
536   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
537   MatScalar  *aa               = a->a;
538   PetscBool  ignorezeroentries = a->ignorezeroentries;
539   Mat        B                 = aij->B;
540   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
541   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
542   MatScalar  *ba               = b->a;
543 
544   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
545   PetscInt  nonew;
546   MatScalar *ap1,*ap2;
547 
548   PetscFunctionBegin;
549   for (i=0; i<m; i++) {
550     if (im[i] < 0) continue;
551 #if defined(PETSC_USE_DEBUG)
552     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
553 #endif
554     if (im[i] >= rstart && im[i] < rend) {
555       row      = im[i] - rstart;
556       lastcol1 = -1;
557       rp1      = aj + ai[row];
558       ap1      = aa + ai[row];
559       rmax1    = aimax[row];
560       nrow1    = ailen[row];
561       low1     = 0;
562       high1    = nrow1;
563       lastcol2 = -1;
564       rp2      = bj + bi[row];
565       ap2      = ba + bi[row];
566       rmax2    = bimax[row];
567       nrow2    = bilen[row];
568       low2     = 0;
569       high2    = nrow2;
570 
571       for (j=0; j<n; j++) {
572         if (roworiented) value = v[i*n+j];
573         else             value = v[i+j*m];
574         if (in[j] >= cstart && in[j] < cend) {
575           col   = in[j] - cstart;
576           nonew = a->nonew;
577           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
578           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
579         } else if (in[j] < 0) continue;
580 #if defined(PETSC_USE_DEBUG)
581         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
582 #endif
583         else {
584           if (mat->was_assembled) {
585             if (!aij->colmap) {
586               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
587             }
588 #if defined(PETSC_USE_CTABLE)
589             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
590             col--;
591 #else
592             col = aij->colmap[in[j]] - 1;
593 #endif
594             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
595               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
596               col  =  in[j];
597               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
598               B     = aij->B;
599               b     = (Mat_SeqAIJ*)B->data;
600               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
601               rp2   = bj + bi[row];
602               ap2   = ba + bi[row];
603               rmax2 = bimax[row];
604               nrow2 = bilen[row];
605               low2  = 0;
606               high2 = nrow2;
607               bm    = aij->B->rmap->n;
608               ba    = b->a;
609             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
610           } else col = in[j];
611           nonew = b->nonew;
612           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
613         }
614       }
615     } else {
616       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
617       if (!aij->donotstash) {
618         mat->assembled = PETSC_FALSE;
619         if (roworiented) {
620           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         } else {
622           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
623         }
624       }
625     }
626   }
627   PetscFunctionReturn(0);
628 }
629 
630 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscErrorCode ierr;
634   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
635   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
636 
637   PetscFunctionBegin;
638   for (i=0; i<m; i++) {
639     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
640     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
641     if (idxm[i] >= rstart && idxm[i] < rend) {
642       row = idxm[i] - rstart;
643       for (j=0; j<n; j++) {
644         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
645         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
646         if (idxn[j] >= cstart && idxn[j] < cend) {
647           col  = idxn[j] - cstart;
648           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
649         } else {
650           if (!aij->colmap) {
651             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
652           }
653 #if defined(PETSC_USE_CTABLE)
654           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
655           col--;
656 #else
657           col = aij->colmap[idxn[j]] - 1;
658 #endif
659           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
660           else {
661             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
662           }
663         }
664       }
665     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
666   }
667   PetscFunctionReturn(0);
668 }
669 
670 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
671 
672 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
673 {
674   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
675   PetscErrorCode ierr;
676   PetscInt       nstash,reallocs;
677 
678   PetscFunctionBegin;
679   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
680 
681   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
682   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
683   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
684   PetscFunctionReturn(0);
685 }
686 
687 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
688 {
689   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
690   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
691   PetscErrorCode ierr;
692   PetscMPIInt    n;
693   PetscInt       i,j,rstart,ncols,flg;
694   PetscInt       *row,*col;
695   PetscBool      other_disassembled;
696   PetscScalar    *val;
697 
698   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
699 
700   PetscFunctionBegin;
701   if (!aij->donotstash && !mat->nooffprocentries) {
702     while (1) {
703       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
704       if (!flg) break;
705 
706       for (i=0; i<n; ) {
707         /* Now identify the consecutive vals belonging to the same row */
708         for (j=i,rstart=row[j]; j<n; j++) {
709           if (row[j] != rstart) break;
710         }
711         if (j < n) ncols = j-i;
712         else       ncols = n-i;
713         /* Now assemble all these values with a single function call */
714         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
715 
716         i = j;
717       }
718     }
719     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
720   }
721   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
722   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
723 
724   /* determine if any processor has disassembled, if so we must
725      also disassemble ourselfs, in order that we may reassemble. */
726   /*
727      if nonzero structure of submatrix B cannot change then we know that
728      no processor disassembled thus we can skip this stuff
729   */
730   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
731     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
732     if (mat->was_assembled && !other_disassembled) {
733       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
734     }
735   }
736   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
737     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
738   }
739   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
740   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
741   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
742 
743   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
744 
745   aij->rowvalues = 0;
746 
747   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
748   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
749 
750   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
751   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
752     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
753     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
754   }
755   PetscFunctionReturn(0);
756 }
757 
758 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
759 {
760   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
761   PetscErrorCode ierr;
762 
763   PetscFunctionBegin;
764   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
765   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
766   PetscFunctionReturn(0);
767 }
768 
769 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
770 {
771   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
772   PetscInt      *lrows;
773   PetscInt       r, len;
774   PetscErrorCode ierr;
775 
776   PetscFunctionBegin;
777   /* get locally owned rows */
778   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
779   /* fix right hand side if needed */
780   if (x && b) {
781     const PetscScalar *xx;
782     PetscScalar       *bb;
783 
784     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
785     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
786     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
787     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
788     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
789   }
790   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
791   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
792   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
793     PetscBool cong;
794     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
795     if (cong) A->congruentlayouts = 1;
796     else      A->congruentlayouts = 0;
797   }
798   if ((diag != 0.0) && A->congruentlayouts) {
799     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
800   } else if (diag != 0.0) {
801     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
802     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
803     for (r = 0; r < len; ++r) {
804       const PetscInt row = lrows[r] + A->rmap->rstart;
805       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
806     }
807     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
808     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
809   } else {
810     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
811   }
812   ierr = PetscFree(lrows);CHKERRQ(ierr);
813 
814   /* only change matrix nonzero state if pattern was allowed to be changed */
815   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
816     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
817     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
818   }
819   PetscFunctionReturn(0);
820 }
821 
822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
823 {
824   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
825   PetscErrorCode    ierr;
826   PetscMPIInt       n = A->rmap->n;
827   PetscInt          i,j,r,m,p = 0,len = 0;
828   PetscInt          *lrows,*owners = A->rmap->range;
829   PetscSFNode       *rrows;
830   PetscSF           sf;
831   const PetscScalar *xx;
832   PetscScalar       *bb,*mask;
833   Vec               xmask,lmask;
834   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
835   const PetscInt    *aj, *ii,*ridx;
836   PetscScalar       *aa;
837 
838   PetscFunctionBegin;
839   /* Create SF where leaves are input rows and roots are owned rows */
840   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
841   for (r = 0; r < n; ++r) lrows[r] = -1;
842   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
843   for (r = 0; r < N; ++r) {
844     const PetscInt idx   = rows[r];
845     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
846     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
847       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
848     }
849     rrows[r].rank  = p;
850     rrows[r].index = rows[r] - owners[p];
851   }
852   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
853   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
854   /* Collect flags for rows to be zeroed */
855   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
856   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
857   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
858   /* Compress and put in row numbers */
859   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
860   /* zero diagonal part of matrix */
861   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
862   /* handle off diagonal part of matrix */
863   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
864   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
865   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
866   for (i=0; i<len; i++) bb[lrows[i]] = 1;
867   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
868   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
870   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
871   if (x) {
872     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
875     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
876   }
877   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
878   /* remove zeroed rows of off diagonal matrix */
879   ii = aij->i;
880   for (i=0; i<len; i++) {
881     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
882   }
883   /* loop over all elements of off process part of matrix zeroing removed columns*/
884   if (aij->compressedrow.use) {
885     m    = aij->compressedrow.nrows;
886     ii   = aij->compressedrow.i;
887     ridx = aij->compressedrow.rindex;
888     for (i=0; i<m; i++) {
889       n  = ii[i+1] - ii[i];
890       aj = aij->j + ii[i];
891       aa = aij->a + ii[i];
892 
893       for (j=0; j<n; j++) {
894         if (PetscAbsScalar(mask[*aj])) {
895           if (b) bb[*ridx] -= *aa*xx[*aj];
896           *aa = 0.0;
897         }
898         aa++;
899         aj++;
900       }
901       ridx++;
902     }
903   } else { /* do not use compressed row format */
904     m = l->B->rmap->n;
905     for (i=0; i<m; i++) {
906       n  = ii[i+1] - ii[i];
907       aj = aij->j + ii[i];
908       aa = aij->a + ii[i];
909       for (j=0; j<n; j++) {
910         if (PetscAbsScalar(mask[*aj])) {
911           if (b) bb[i] -= *aa*xx[*aj];
912           *aa = 0.0;
913         }
914         aa++;
915         aj++;
916       }
917     }
918   }
919   if (x) {
920     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
921     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
922   }
923   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
924   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
925   ierr = PetscFree(lrows);CHKERRQ(ierr);
926 
927   /* only change matrix nonzero state if pattern was allowed to be changed */
928   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
929     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
930     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
936 {
937   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
938   PetscErrorCode ierr;
939   PetscInt       nt;
940 
941   PetscFunctionBegin;
942   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
943   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
944   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
946   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
952 {
953   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
962 {
963   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
964   PetscErrorCode ierr;
965 
966   PetscFunctionBegin;
967   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
969   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
970   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
971   PetscFunctionReturn(0);
972 }
973 
974 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
975 {
976   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
977   PetscErrorCode ierr;
978   PetscBool      merged;
979 
980   PetscFunctionBegin;
981   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
982   /* do nondiagonal part */
983   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
984   if (!merged) {
985     /* send it on its way */
986     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
987     /* do local part */
988     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
989     /* receive remote parts: note this assumes the values are not actually */
990     /* added in yy until the next line, */
991     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
992   } else {
993     /* do local part */
994     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
995     /* send it on its way */
996     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997     /* values actually were received in the Begin() but we need to call this nop */
998     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999   }
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1004 {
1005   MPI_Comm       comm;
1006   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1007   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1008   IS             Me,Notme;
1009   PetscErrorCode ierr;
1010   PetscInt       M,N,first,last,*notme,i;
1011   PetscMPIInt    size;
1012 
1013   PetscFunctionBegin;
1014   /* Easy test: symmetric diagonal block */
1015   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1016   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1017   if (!*f) PetscFunctionReturn(0);
1018   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1019   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1020   if (size == 1) PetscFunctionReturn(0);
1021 
1022   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1023   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1024   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1025   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1026   for (i=0; i<first; i++) notme[i] = i;
1027   for (i=last; i<M; i++) notme[i-last+first] = i;
1028   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1029   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1030   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1031   Aoff = Aoffs[0];
1032   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1033   Boff = Boffs[0];
1034   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1035   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1036   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1037   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1038   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1039   ierr = PetscFree(notme);CHKERRQ(ierr);
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1044 {
1045   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1046   PetscErrorCode ierr;
1047 
1048   PetscFunctionBegin;
1049   /* do nondiagonal part */
1050   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1051   /* send it on its way */
1052   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1053   /* do local part */
1054   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1055   /* receive remote parts */
1056   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 /*
1061   This only works correctly for square matrices where the subblock A->A is the
1062    diagonal block
1063 */
1064 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1065 {
1066   PetscErrorCode ierr;
1067   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1068 
1069   PetscFunctionBegin;
1070   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1071   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1072   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1077 {
1078   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1079   PetscErrorCode ierr;
1080 
1081   PetscFunctionBegin;
1082   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1083   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1088 {
1089   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1090   PetscErrorCode ierr;
1091 
1092   PetscFunctionBegin;
1093 #if defined(PETSC_USE_LOG)
1094   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1095 #endif
1096   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1097   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1098   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1099   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1100 #if defined(PETSC_USE_CTABLE)
1101   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1102 #else
1103   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1104 #endif
1105   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1106   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1107   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1108   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1109   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1110   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1111 
1112   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1113   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1114   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1115   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1116   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1117   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1118   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1119   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1120 #if defined(PETSC_HAVE_ELEMENTAL)
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1122 #endif
1123 #if defined(PETSC_HAVE_HYPRE)
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1126 #endif
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1131 {
1132   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1133   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1134   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1135   PetscErrorCode ierr;
1136   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1137   int            fd;
1138   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1139   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1140   PetscScalar    *column_values;
1141   PetscInt       message_count,flowcontrolcount;
1142   FILE           *file;
1143 
1144   PetscFunctionBegin;
1145   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1146   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1147   nz   = A->nz + B->nz;
1148   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1149   if (!rank) {
1150     header[0] = MAT_FILE_CLASSID;
1151     header[1] = mat->rmap->N;
1152     header[2] = mat->cmap->N;
1153 
1154     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1155     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1156     /* get largest number of rows any processor has */
1157     rlen  = mat->rmap->n;
1158     range = mat->rmap->range;
1159     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1160   } else {
1161     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1162     rlen = mat->rmap->n;
1163   }
1164 
1165   /* load up the local row counts */
1166   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1167   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1168 
1169   /* store the row lengths to the file */
1170   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1171   if (!rank) {
1172     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1173     for (i=1; i<size; i++) {
1174       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1175       rlen = range[i+1] - range[i];
1176       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1177       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1178     }
1179     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1180   } else {
1181     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1182     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1183     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1184   }
1185   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1186 
1187   /* load up the local column indices */
1188   nzmax = nz; /* th processor needs space a largest processor needs */
1189   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1190   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1191   cnt   = 0;
1192   for (i=0; i<mat->rmap->n; i++) {
1193     for (j=B->i[i]; j<B->i[i+1]; j++) {
1194       if ((col = garray[B->j[j]]) > cstart) break;
1195       column_indices[cnt++] = col;
1196     }
1197     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1198     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1199   }
1200   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1201 
1202   /* store the column indices to the file */
1203   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1204   if (!rank) {
1205     MPI_Status status;
1206     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1207     for (i=1; i<size; i++) {
1208       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1209       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1210       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1211       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1213     }
1214     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1215   } else {
1216     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1217     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1218     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1220   }
1221   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1222 
1223   /* load up the local column values */
1224   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1225   cnt  = 0;
1226   for (i=0; i<mat->rmap->n; i++) {
1227     for (j=B->i[i]; j<B->i[i+1]; j++) {
1228       if (garray[B->j[j]] > cstart) break;
1229       column_values[cnt++] = B->a[j];
1230     }
1231     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1232     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1233   }
1234   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1235 
1236   /* store the column values to the file */
1237   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1238   if (!rank) {
1239     MPI_Status status;
1240     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1241     for (i=1; i<size; i++) {
1242       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1243       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1244       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1245       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1246       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1247     }
1248     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1249   } else {
1250     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1251     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1252     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1253     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1254   }
1255   ierr = PetscFree(column_values);CHKERRQ(ierr);
1256 
1257   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1258   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #include <petscdraw.h>
1263 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1264 {
1265   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1266   PetscErrorCode    ierr;
1267   PetscMPIInt       rank = aij->rank,size = aij->size;
1268   PetscBool         isdraw,iascii,isbinary;
1269   PetscViewer       sviewer;
1270   PetscViewerFormat format;
1271 
1272   PetscFunctionBegin;
1273   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1274   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1275   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1276   if (iascii) {
1277     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1278     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1279       MatInfo   info;
1280       PetscBool inodes;
1281 
1282       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1283       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1284       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1285       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1286       if (!inodes) {
1287         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1288                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1289       } else {
1290         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1291                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1292       }
1293       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1294       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1295       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1296       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1297       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1298       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1299       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1300       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1301       PetscFunctionReturn(0);
1302     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1303       PetscInt inodecount,inodelimit,*inodes;
1304       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1305       if (inodes) {
1306         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1307       } else {
1308         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1309       }
1310       PetscFunctionReturn(0);
1311     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1312       PetscFunctionReturn(0);
1313     }
1314   } else if (isbinary) {
1315     if (size == 1) {
1316       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1317       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1318     } else {
1319       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1320     }
1321     PetscFunctionReturn(0);
1322   } else if (isdraw) {
1323     PetscDraw draw;
1324     PetscBool isnull;
1325     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1326     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1327     if (isnull) PetscFunctionReturn(0);
1328   }
1329 
1330   {
1331     /* assemble the entire matrix onto first processor. */
1332     Mat        A;
1333     Mat_SeqAIJ *Aloc;
1334     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1335     MatScalar  *a;
1336 
1337     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1338     if (!rank) {
1339       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1340     } else {
1341       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1342     }
1343     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1344     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1345     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1346     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1347     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1348 
1349     /* copy over the A part */
1350     Aloc = (Mat_SeqAIJ*)aij->A->data;
1351     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1352     row  = mat->rmap->rstart;
1353     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1354     for (i=0; i<m; i++) {
1355       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1356       row++;
1357       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1358     }
1359     aj = Aloc->j;
1360     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1361 
1362     /* copy over the B part */
1363     Aloc = (Mat_SeqAIJ*)aij->B->data;
1364     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1365     row  = mat->rmap->rstart;
1366     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1367     ct   = cols;
1368     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1369     for (i=0; i<m; i++) {
1370       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1371       row++;
1372       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1373     }
1374     ierr = PetscFree(ct);CHKERRQ(ierr);
1375     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1376     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1377     /*
1378        Everyone has to call to draw the matrix since the graphics waits are
1379        synchronized across all processors that share the PetscDraw object
1380     */
1381     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1382     if (!rank) {
1383       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1384       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1385     }
1386     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1387     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1388     ierr = MatDestroy(&A);CHKERRQ(ierr);
1389   }
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1394 {
1395   PetscErrorCode ierr;
1396   PetscBool      iascii,isdraw,issocket,isbinary;
1397 
1398   PetscFunctionBegin;
1399   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1400   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1401   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1402   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1403   if (iascii || isdraw || isbinary || issocket) {
1404     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1410 {
1411   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1412   PetscErrorCode ierr;
1413   Vec            bb1 = 0;
1414   PetscBool      hasop;
1415 
1416   PetscFunctionBegin;
1417   if (flag == SOR_APPLY_UPPER) {
1418     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1419     PetscFunctionReturn(0);
1420   }
1421 
1422   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1423     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1424   }
1425 
1426   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1427     if (flag & SOR_ZERO_INITIAL_GUESS) {
1428       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1429       its--;
1430     }
1431 
1432     while (its--) {
1433       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1434       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1435 
1436       /* update rhs: bb1 = bb - B*x */
1437       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1438       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1439 
1440       /* local sweep */
1441       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1442     }
1443   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1444     if (flag & SOR_ZERO_INITIAL_GUESS) {
1445       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1446       its--;
1447     }
1448     while (its--) {
1449       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1450       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1451 
1452       /* update rhs: bb1 = bb - B*x */
1453       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1454       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1455 
1456       /* local sweep */
1457       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1458     }
1459   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1460     if (flag & SOR_ZERO_INITIAL_GUESS) {
1461       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1462       its--;
1463     }
1464     while (its--) {
1465       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1466       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1467 
1468       /* update rhs: bb1 = bb - B*x */
1469       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1470       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1471 
1472       /* local sweep */
1473       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1474     }
1475   } else if (flag & SOR_EISENSTAT) {
1476     Vec xx1;
1477 
1478     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1479     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1480 
1481     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1482     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1483     if (!mat->diag) {
1484       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1485       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1486     }
1487     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1488     if (hasop) {
1489       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1490     } else {
1491       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1492     }
1493     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1494 
1495     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1496 
1497     /* local sweep */
1498     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1499     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1500     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1501   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1502 
1503   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1504 
1505   matin->factorerrortype = mat->A->factorerrortype;
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1510 {
1511   Mat            aA,aB,Aperm;
1512   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1513   PetscScalar    *aa,*ba;
1514   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1515   PetscSF        rowsf,sf;
1516   IS             parcolp = NULL;
1517   PetscBool      done;
1518   PetscErrorCode ierr;
1519 
1520   PetscFunctionBegin;
1521   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1522   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1523   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1524   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1525 
1526   /* Invert row permutation to find out where my rows should go */
1527   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1528   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1529   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1530   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1531   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1532   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1533 
1534   /* Invert column permutation to find out where my columns should go */
1535   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1536   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1537   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1538   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1539   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1540   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1541   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1542 
1543   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1544   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1545   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1546 
1547   /* Find out where my gcols should go */
1548   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1549   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1550   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1551   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1552   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1553   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1554   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1555   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1556 
1557   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1558   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1559   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1560   for (i=0; i<m; i++) {
1561     PetscInt row = rdest[i],rowner;
1562     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1563     for (j=ai[i]; j<ai[i+1]; j++) {
1564       PetscInt cowner,col = cdest[aj[j]];
1565       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1566       if (rowner == cowner) dnnz[i]++;
1567       else onnz[i]++;
1568     }
1569     for (j=bi[i]; j<bi[i+1]; j++) {
1570       PetscInt cowner,col = gcdest[bj[j]];
1571       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1572       if (rowner == cowner) dnnz[i]++;
1573       else onnz[i]++;
1574     }
1575   }
1576   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1577   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1578   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1579   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1580   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1581 
1582   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1583   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1584   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1585   for (i=0; i<m; i++) {
1586     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1587     PetscInt j0,rowlen;
1588     rowlen = ai[i+1] - ai[i];
1589     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1590       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1591       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1592     }
1593     rowlen = bi[i+1] - bi[i];
1594     for (j0=j=0; j<rowlen; j0=j) {
1595       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1596       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1597     }
1598   }
1599   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1600   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1601   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1602   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1603   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1604   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1605   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1606   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1607   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1608   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1609   *B = Aperm;
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1614 {
1615   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1616   PetscErrorCode ierr;
1617 
1618   PetscFunctionBegin;
1619   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1620   if (ghosts) *ghosts = aij->garray;
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1625 {
1626   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1627   Mat            A    = mat->A,B = mat->B;
1628   PetscErrorCode ierr;
1629   PetscReal      isend[5],irecv[5];
1630 
1631   PetscFunctionBegin;
1632   info->block_size = 1.0;
1633   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1634 
1635   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1636   isend[3] = info->memory;  isend[4] = info->mallocs;
1637 
1638   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1639 
1640   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1641   isend[3] += info->memory;  isend[4] += info->mallocs;
1642   if (flag == MAT_LOCAL) {
1643     info->nz_used      = isend[0];
1644     info->nz_allocated = isend[1];
1645     info->nz_unneeded  = isend[2];
1646     info->memory       = isend[3];
1647     info->mallocs      = isend[4];
1648   } else if (flag == MAT_GLOBAL_MAX) {
1649     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1650 
1651     info->nz_used      = irecv[0];
1652     info->nz_allocated = irecv[1];
1653     info->nz_unneeded  = irecv[2];
1654     info->memory       = irecv[3];
1655     info->mallocs      = irecv[4];
1656   } else if (flag == MAT_GLOBAL_SUM) {
1657     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1658 
1659     info->nz_used      = irecv[0];
1660     info->nz_allocated = irecv[1];
1661     info->nz_unneeded  = irecv[2];
1662     info->memory       = irecv[3];
1663     info->mallocs      = irecv[4];
1664   }
1665   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1666   info->fill_ratio_needed = 0;
1667   info->factor_mallocs    = 0;
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1672 {
1673   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1674   PetscErrorCode ierr;
1675 
1676   PetscFunctionBegin;
1677   switch (op) {
1678   case MAT_NEW_NONZERO_LOCATIONS:
1679   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1680   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1681   case MAT_KEEP_NONZERO_PATTERN:
1682   case MAT_NEW_NONZERO_LOCATION_ERR:
1683   case MAT_USE_INODES:
1684   case MAT_IGNORE_ZERO_ENTRIES:
1685     MatCheckPreallocated(A,1);
1686     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1687     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1688     break;
1689   case MAT_ROW_ORIENTED:
1690     MatCheckPreallocated(A,1);
1691     a->roworiented = flg;
1692 
1693     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1694     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1695     break;
1696   case MAT_NEW_DIAGONALS:
1697     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1698     break;
1699   case MAT_IGNORE_OFF_PROC_ENTRIES:
1700     a->donotstash = flg;
1701     break;
1702   case MAT_SPD:
1703     A->spd_set = PETSC_TRUE;
1704     A->spd     = flg;
1705     if (flg) {
1706       A->symmetric                  = PETSC_TRUE;
1707       A->structurally_symmetric     = PETSC_TRUE;
1708       A->symmetric_set              = PETSC_TRUE;
1709       A->structurally_symmetric_set = PETSC_TRUE;
1710     }
1711     break;
1712   case MAT_SYMMETRIC:
1713     MatCheckPreallocated(A,1);
1714     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1715     break;
1716   case MAT_STRUCTURALLY_SYMMETRIC:
1717     MatCheckPreallocated(A,1);
1718     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1719     break;
1720   case MAT_HERMITIAN:
1721     MatCheckPreallocated(A,1);
1722     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1723     break;
1724   case MAT_SYMMETRY_ETERNAL:
1725     MatCheckPreallocated(A,1);
1726     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1727     break;
1728   case MAT_SUBMAT_SINGLEIS:
1729     A->submat_singleis = flg;
1730     break;
1731   default:
1732     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1733   }
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1738 {
1739   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1740   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1741   PetscErrorCode ierr;
1742   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1743   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1744   PetscInt       *cmap,*idx_p;
1745 
1746   PetscFunctionBegin;
1747   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1748   mat->getrowactive = PETSC_TRUE;
1749 
1750   if (!mat->rowvalues && (idx || v)) {
1751     /*
1752         allocate enough space to hold information from the longest row.
1753     */
1754     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1755     PetscInt   max = 1,tmp;
1756     for (i=0; i<matin->rmap->n; i++) {
1757       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1758       if (max < tmp) max = tmp;
1759     }
1760     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1761   }
1762 
1763   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1764   lrow = row - rstart;
1765 
1766   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1767   if (!v)   {pvA = 0; pvB = 0;}
1768   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1769   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1770   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1771   nztot = nzA + nzB;
1772 
1773   cmap = mat->garray;
1774   if (v  || idx) {
1775     if (nztot) {
1776       /* Sort by increasing column numbers, assuming A and B already sorted */
1777       PetscInt imark = -1;
1778       if (v) {
1779         *v = v_p = mat->rowvalues;
1780         for (i=0; i<nzB; i++) {
1781           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1782           else break;
1783         }
1784         imark = i;
1785         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1786         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1787       }
1788       if (idx) {
1789         *idx = idx_p = mat->rowindices;
1790         if (imark > -1) {
1791           for (i=0; i<imark; i++) {
1792             idx_p[i] = cmap[cworkB[i]];
1793           }
1794         } else {
1795           for (i=0; i<nzB; i++) {
1796             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1797             else break;
1798           }
1799           imark = i;
1800         }
1801         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1802         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1803       }
1804     } else {
1805       if (idx) *idx = 0;
1806       if (v)   *v   = 0;
1807     }
1808   }
1809   *nz  = nztot;
1810   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1811   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1816 {
1817   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1818 
1819   PetscFunctionBegin;
1820   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1821   aij->getrowactive = PETSC_FALSE;
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1826 {
1827   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1828   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1829   PetscErrorCode ierr;
1830   PetscInt       i,j,cstart = mat->cmap->rstart;
1831   PetscReal      sum = 0.0;
1832   MatScalar      *v;
1833 
1834   PetscFunctionBegin;
1835   if (aij->size == 1) {
1836     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1837   } else {
1838     if (type == NORM_FROBENIUS) {
1839       v = amat->a;
1840       for (i=0; i<amat->nz; i++) {
1841         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1842       }
1843       v = bmat->a;
1844       for (i=0; i<bmat->nz; i++) {
1845         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1846       }
1847       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1848       *norm = PetscSqrtReal(*norm);
1849       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1850     } else if (type == NORM_1) { /* max column norm */
1851       PetscReal *tmp,*tmp2;
1852       PetscInt  *jj,*garray = aij->garray;
1853       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1854       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1855       *norm = 0.0;
1856       v     = amat->a; jj = amat->j;
1857       for (j=0; j<amat->nz; j++) {
1858         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1859       }
1860       v = bmat->a; jj = bmat->j;
1861       for (j=0; j<bmat->nz; j++) {
1862         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1863       }
1864       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1865       for (j=0; j<mat->cmap->N; j++) {
1866         if (tmp2[j] > *norm) *norm = tmp2[j];
1867       }
1868       ierr = PetscFree(tmp);CHKERRQ(ierr);
1869       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1870       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1871     } else if (type == NORM_INFINITY) { /* max row norm */
1872       PetscReal ntemp = 0.0;
1873       for (j=0; j<aij->A->rmap->n; j++) {
1874         v   = amat->a + amat->i[j];
1875         sum = 0.0;
1876         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1877           sum += PetscAbsScalar(*v); v++;
1878         }
1879         v = bmat->a + bmat->i[j];
1880         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1881           sum += PetscAbsScalar(*v); v++;
1882         }
1883         if (sum > ntemp) ntemp = sum;
1884       }
1885       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1886       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1887     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1888   }
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1893 {
1894   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1895   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1896   PetscErrorCode ierr;
1897   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1898   PetscInt       cstart = A->cmap->rstart,ncol;
1899   Mat            B;
1900   MatScalar      *array;
1901 
1902   PetscFunctionBegin;
1903   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1904 
1905   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1906   ai = Aloc->i; aj = Aloc->j;
1907   bi = Bloc->i; bj = Bloc->j;
1908   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1909     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1910     PetscSFNode          *oloc;
1911     PETSC_UNUSED PetscSF sf;
1912 
1913     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1914     /* compute d_nnz for preallocation */
1915     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1916     for (i=0; i<ai[ma]; i++) {
1917       d_nnz[aj[i]]++;
1918       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1919     }
1920     /* compute local off-diagonal contributions */
1921     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1922     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1923     /* map those to global */
1924     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1925     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1926     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1927     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1928     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1929     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1930     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1931 
1932     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1933     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1934     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1935     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1936     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1937     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1938   } else {
1939     B    = *matout;
1940     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1941     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1942   }
1943 
1944   /* copy over the A part */
1945   array = Aloc->a;
1946   row   = A->rmap->rstart;
1947   for (i=0; i<ma; i++) {
1948     ncol = ai[i+1]-ai[i];
1949     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1950     row++;
1951     array += ncol; aj += ncol;
1952   }
1953   aj = Aloc->j;
1954   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1955 
1956   /* copy over the B part */
1957   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1958   array = Bloc->a;
1959   row   = A->rmap->rstart;
1960   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1961   cols_tmp = cols;
1962   for (i=0; i<mb; i++) {
1963     ncol = bi[i+1]-bi[i];
1964     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1965     row++;
1966     array += ncol; cols_tmp += ncol;
1967   }
1968   ierr = PetscFree(cols);CHKERRQ(ierr);
1969 
1970   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1971   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1972   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1973     *matout = B;
1974   } else {
1975     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1976   }
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1981 {
1982   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1983   Mat            a    = aij->A,b = aij->B;
1984   PetscErrorCode ierr;
1985   PetscInt       s1,s2,s3;
1986 
1987   PetscFunctionBegin;
1988   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1989   if (rr) {
1990     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1991     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1992     /* Overlap communication with computation. */
1993     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1994   }
1995   if (ll) {
1996     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1997     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1998     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1999   }
2000   /* scale  the diagonal block */
2001   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2002 
2003   if (rr) {
2004     /* Do a scatter end and then right scale the off-diagonal block */
2005     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2006     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2007   }
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2012 {
2013   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2014   PetscErrorCode ierr;
2015 
2016   PetscFunctionBegin;
2017   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2022 {
2023   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2024   Mat            a,b,c,d;
2025   PetscBool      flg;
2026   PetscErrorCode ierr;
2027 
2028   PetscFunctionBegin;
2029   a = matA->A; b = matA->B;
2030   c = matB->A; d = matB->B;
2031 
2032   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2033   if (flg) {
2034     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2035   }
2036   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2041 {
2042   PetscErrorCode ierr;
2043   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2044   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2045 
2046   PetscFunctionBegin;
2047   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2048   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2049     /* because of the column compression in the off-processor part of the matrix a->B,
2050        the number of columns in a->B and b->B may be different, hence we cannot call
2051        the MatCopy() directly on the two parts. If need be, we can provide a more
2052        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2053        then copying the submatrices */
2054     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2055   } else {
2056     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2057     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2058   }
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2063 {
2064   PetscErrorCode ierr;
2065 
2066   PetscFunctionBegin;
2067   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 /*
2072    Computes the number of nonzeros per row needed for preallocation when X and Y
2073    have different nonzero structure.
2074 */
2075 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2076 {
2077   PetscInt       i,j,k,nzx,nzy;
2078 
2079   PetscFunctionBegin;
2080   /* Set the number of nonzeros in the new matrix */
2081   for (i=0; i<m; i++) {
2082     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2083     nzx = xi[i+1] - xi[i];
2084     nzy = yi[i+1] - yi[i];
2085     nnz[i] = 0;
2086     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2087       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2088       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2089       nnz[i]++;
2090     }
2091     for (; k<nzy; k++) nnz[i]++;
2092   }
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2097 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2098 {
2099   PetscErrorCode ierr;
2100   PetscInt       m = Y->rmap->N;
2101   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2102   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2103 
2104   PetscFunctionBegin;
2105   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2110 {
2111   PetscErrorCode ierr;
2112   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2113   PetscBLASInt   bnz,one=1;
2114   Mat_SeqAIJ     *x,*y;
2115 
2116   PetscFunctionBegin;
2117   if (str == SAME_NONZERO_PATTERN) {
2118     PetscScalar alpha = a;
2119     x    = (Mat_SeqAIJ*)xx->A->data;
2120     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2121     y    = (Mat_SeqAIJ*)yy->A->data;
2122     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2123     x    = (Mat_SeqAIJ*)xx->B->data;
2124     y    = (Mat_SeqAIJ*)yy->B->data;
2125     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2126     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2127     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2128   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2129     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2130   } else {
2131     Mat      B;
2132     PetscInt *nnz_d,*nnz_o;
2133     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2134     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2135     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2136     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2137     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2138     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2139     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2140     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2141     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2142     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2143     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2144     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2145     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2146     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2147   }
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2152 
2153 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2154 {
2155 #if defined(PETSC_USE_COMPLEX)
2156   PetscErrorCode ierr;
2157   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2158 
2159   PetscFunctionBegin;
2160   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2161   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2162 #else
2163   PetscFunctionBegin;
2164 #endif
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2169 {
2170   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2171   PetscErrorCode ierr;
2172 
2173   PetscFunctionBegin;
2174   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2175   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2180 {
2181   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2182   PetscErrorCode ierr;
2183 
2184   PetscFunctionBegin;
2185   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2186   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2191 {
2192   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2193   PetscErrorCode ierr;
2194   PetscInt       i,*idxb = 0;
2195   PetscScalar    *va,*vb;
2196   Vec            vtmp;
2197 
2198   PetscFunctionBegin;
2199   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2200   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2201   if (idx) {
2202     for (i=0; i<A->rmap->n; i++) {
2203       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2204     }
2205   }
2206 
2207   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2208   if (idx) {
2209     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2210   }
2211   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2212   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2213 
2214   for (i=0; i<A->rmap->n; i++) {
2215     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2216       va[i] = vb[i];
2217       if (idx) idx[i] = a->garray[idxb[i]];
2218     }
2219   }
2220 
2221   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2222   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2223   ierr = PetscFree(idxb);CHKERRQ(ierr);
2224   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2225   PetscFunctionReturn(0);
2226 }
2227 
2228 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2229 {
2230   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2231   PetscErrorCode ierr;
2232   PetscInt       i,*idxb = 0;
2233   PetscScalar    *va,*vb;
2234   Vec            vtmp;
2235 
2236   PetscFunctionBegin;
2237   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2238   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2239   if (idx) {
2240     for (i=0; i<A->cmap->n; i++) {
2241       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2242     }
2243   }
2244 
2245   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2246   if (idx) {
2247     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2248   }
2249   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2250   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2251 
2252   for (i=0; i<A->rmap->n; i++) {
2253     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2254       va[i] = vb[i];
2255       if (idx) idx[i] = a->garray[idxb[i]];
2256     }
2257   }
2258 
2259   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2260   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2261   ierr = PetscFree(idxb);CHKERRQ(ierr);
2262   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2267 {
2268   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2269   PetscInt       n      = A->rmap->n;
2270   PetscInt       cstart = A->cmap->rstart;
2271   PetscInt       *cmap  = mat->garray;
2272   PetscInt       *diagIdx, *offdiagIdx;
2273   Vec            diagV, offdiagV;
2274   PetscScalar    *a, *diagA, *offdiagA;
2275   PetscInt       r;
2276   PetscErrorCode ierr;
2277 
2278   PetscFunctionBegin;
2279   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2280   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2281   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2282   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2283   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2284   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2285   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2286   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2287   for (r = 0; r < n; ++r) {
2288     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2289       a[r]   = diagA[r];
2290       idx[r] = cstart + diagIdx[r];
2291     } else {
2292       a[r]   = offdiagA[r];
2293       idx[r] = cmap[offdiagIdx[r]];
2294     }
2295   }
2296   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2297   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2298   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2299   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2300   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2301   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2306 {
2307   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2308   PetscInt       n      = A->rmap->n;
2309   PetscInt       cstart = A->cmap->rstart;
2310   PetscInt       *cmap  = mat->garray;
2311   PetscInt       *diagIdx, *offdiagIdx;
2312   Vec            diagV, offdiagV;
2313   PetscScalar    *a, *diagA, *offdiagA;
2314   PetscInt       r;
2315   PetscErrorCode ierr;
2316 
2317   PetscFunctionBegin;
2318   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2319   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2320   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2321   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2322   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2323   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2324   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2325   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2326   for (r = 0; r < n; ++r) {
2327     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2328       a[r]   = diagA[r];
2329       idx[r] = cstart + diagIdx[r];
2330     } else {
2331       a[r]   = offdiagA[r];
2332       idx[r] = cmap[offdiagIdx[r]];
2333     }
2334   }
2335   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2336   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2337   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2338   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2339   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2340   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2345 {
2346   PetscErrorCode ierr;
2347   Mat            *dummy;
2348 
2349   PetscFunctionBegin;
2350   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2351   *newmat = *dummy;
2352   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2353   PetscFunctionReturn(0);
2354 }
2355 
2356 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2357 {
2358   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2359   PetscErrorCode ierr;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2363   A->factorerrortype = a->A->factorerrortype;
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2368 {
2369   PetscErrorCode ierr;
2370   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2371 
2372   PetscFunctionBegin;
2373   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2374   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2375   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2376   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2381 {
2382   PetscFunctionBegin;
2383   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2384   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 /*@
2389    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2390 
2391    Collective on Mat
2392 
2393    Input Parameters:
2394 +    A - the matrix
2395 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2396 
2397  Level: advanced
2398 
2399 @*/
2400 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2401 {
2402   PetscErrorCode       ierr;
2403 
2404   PetscFunctionBegin;
2405   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2410 {
2411   PetscErrorCode       ierr;
2412   PetscBool            sc = PETSC_FALSE,flg;
2413 
2414   PetscFunctionBegin;
2415   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2416   ierr = PetscObjectOptionsBegin((PetscObject)A);
2417     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2418     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2419     if (flg) {
2420       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2421     }
2422   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2427 {
2428   PetscErrorCode ierr;
2429   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2430   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2431 
2432   PetscFunctionBegin;
2433   if (!Y->preallocated) {
2434     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2435   } else if (!aij->nz) {
2436     PetscInt nonew = aij->nonew;
2437     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2438     aij->nonew = nonew;
2439   }
2440   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2441   PetscFunctionReturn(0);
2442 }
2443 
2444 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2445 {
2446   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2447   PetscErrorCode ierr;
2448 
2449   PetscFunctionBegin;
2450   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2451   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2452   if (d) {
2453     PetscInt rstart;
2454     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2455     *d += rstart;
2456 
2457   }
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 
2462 /* -------------------------------------------------------------------*/
2463 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2464                                        MatGetRow_MPIAIJ,
2465                                        MatRestoreRow_MPIAIJ,
2466                                        MatMult_MPIAIJ,
2467                                 /* 4*/ MatMultAdd_MPIAIJ,
2468                                        MatMultTranspose_MPIAIJ,
2469                                        MatMultTransposeAdd_MPIAIJ,
2470                                        0,
2471                                        0,
2472                                        0,
2473                                 /*10*/ 0,
2474                                        0,
2475                                        0,
2476                                        MatSOR_MPIAIJ,
2477                                        MatTranspose_MPIAIJ,
2478                                 /*15*/ MatGetInfo_MPIAIJ,
2479                                        MatEqual_MPIAIJ,
2480                                        MatGetDiagonal_MPIAIJ,
2481                                        MatDiagonalScale_MPIAIJ,
2482                                        MatNorm_MPIAIJ,
2483                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2484                                        MatAssemblyEnd_MPIAIJ,
2485                                        MatSetOption_MPIAIJ,
2486                                        MatZeroEntries_MPIAIJ,
2487                                 /*24*/ MatZeroRows_MPIAIJ,
2488                                        0,
2489                                        0,
2490                                        0,
2491                                        0,
2492                                 /*29*/ MatSetUp_MPIAIJ,
2493                                        0,
2494                                        0,
2495                                        MatGetDiagonalBlock_MPIAIJ,
2496                                        0,
2497                                 /*34*/ MatDuplicate_MPIAIJ,
2498                                        0,
2499                                        0,
2500                                        0,
2501                                        0,
2502                                 /*39*/ MatAXPY_MPIAIJ,
2503                                        MatCreateSubMatrices_MPIAIJ,
2504                                        MatIncreaseOverlap_MPIAIJ,
2505                                        MatGetValues_MPIAIJ,
2506                                        MatCopy_MPIAIJ,
2507                                 /*44*/ MatGetRowMax_MPIAIJ,
2508                                        MatScale_MPIAIJ,
2509                                        MatShift_MPIAIJ,
2510                                        MatDiagonalSet_MPIAIJ,
2511                                        MatZeroRowsColumns_MPIAIJ,
2512                                 /*49*/ MatSetRandom_MPIAIJ,
2513                                        0,
2514                                        0,
2515                                        0,
2516                                        0,
2517                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2518                                        0,
2519                                        MatSetUnfactored_MPIAIJ,
2520                                        MatPermute_MPIAIJ,
2521                                        0,
2522                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2523                                        MatDestroy_MPIAIJ,
2524                                        MatView_MPIAIJ,
2525                                        0,
2526                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2527                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2528                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2529                                        0,
2530                                        0,
2531                                        0,
2532                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2533                                        MatGetRowMinAbs_MPIAIJ,
2534                                        0,
2535                                        0,
2536                                        0,
2537                                        0,
2538                                 /*75*/ MatFDColoringApply_AIJ,
2539                                        MatSetFromOptions_MPIAIJ,
2540                                        0,
2541                                        0,
2542                                        MatFindZeroDiagonals_MPIAIJ,
2543                                 /*80*/ 0,
2544                                        0,
2545                                        0,
2546                                 /*83*/ MatLoad_MPIAIJ,
2547                                        0,
2548                                        0,
2549                                        0,
2550                                        0,
2551                                        0,
2552                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2553                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2554                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2555                                        MatPtAP_MPIAIJ_MPIAIJ,
2556                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2557                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2558                                        0,
2559                                        0,
2560                                        0,
2561                                        0,
2562                                 /*99*/ 0,
2563                                        0,
2564                                        0,
2565                                        MatConjugate_MPIAIJ,
2566                                        0,
2567                                 /*104*/MatSetValuesRow_MPIAIJ,
2568                                        MatRealPart_MPIAIJ,
2569                                        MatImaginaryPart_MPIAIJ,
2570                                        0,
2571                                        0,
2572                                 /*109*/0,
2573                                        0,
2574                                        MatGetRowMin_MPIAIJ,
2575                                        0,
2576                                        MatMissingDiagonal_MPIAIJ,
2577                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2578                                        0,
2579                                        MatGetGhosts_MPIAIJ,
2580                                        0,
2581                                        0,
2582                                 /*119*/0,
2583                                        0,
2584                                        0,
2585                                        0,
2586                                        MatGetMultiProcBlock_MPIAIJ,
2587                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2588                                        MatGetColumnNorms_MPIAIJ,
2589                                        MatInvertBlockDiagonal_MPIAIJ,
2590                                        0,
2591                                        MatCreateSubMatricesMPI_MPIAIJ,
2592                                 /*129*/0,
2593                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2594                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2595                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2596                                        0,
2597                                 /*134*/0,
2598                                        0,
2599                                        0,
2600                                        0,
2601                                        0,
2602                                 /*139*/MatSetBlockSizes_MPIAIJ,
2603                                        0,
2604                                        0,
2605                                        MatFDColoringSetUp_MPIXAIJ,
2606                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2607                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2608 };
2609 
2610 /* ----------------------------------------------------------------------------------------*/
2611 
2612 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2613 {
2614   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2615   PetscErrorCode ierr;
2616 
2617   PetscFunctionBegin;
2618   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2619   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2620   PetscFunctionReturn(0);
2621 }
2622 
2623 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2624 {
2625   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2626   PetscErrorCode ierr;
2627 
2628   PetscFunctionBegin;
2629   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2630   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2631   PetscFunctionReturn(0);
2632 }
2633 
2634 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2635 {
2636   Mat_MPIAIJ     *b;
2637   PetscErrorCode ierr;
2638 
2639   PetscFunctionBegin;
2640   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2641   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2642   b = (Mat_MPIAIJ*)B->data;
2643 
2644 #if defined(PETSC_USE_CTABLE)
2645   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2646 #else
2647   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2648 #endif
2649   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2650   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2651   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2652 
2653   /* Because the B will have been resized we simply destroy it and create a new one each time */
2654   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2655   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2656   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2657   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2658   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2659   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2660 
2661   if (!B->preallocated) {
2662     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2663     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2664     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2665     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2666     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2667   }
2668 
2669   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2670   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2671   B->preallocated  = PETSC_TRUE;
2672   B->was_assembled = PETSC_FALSE;
2673   B->assembled     = PETSC_FALSE;;
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2678 {
2679   Mat            mat;
2680   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2681   PetscErrorCode ierr;
2682 
2683   PetscFunctionBegin;
2684   *newmat = 0;
2685   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2686   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2687   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2688   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2689   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2690   a       = (Mat_MPIAIJ*)mat->data;
2691 
2692   mat->factortype   = matin->factortype;
2693   mat->assembled    = PETSC_TRUE;
2694   mat->insertmode   = NOT_SET_VALUES;
2695   mat->preallocated = PETSC_TRUE;
2696 
2697   a->size         = oldmat->size;
2698   a->rank         = oldmat->rank;
2699   a->donotstash   = oldmat->donotstash;
2700   a->roworiented  = oldmat->roworiented;
2701   a->rowindices   = 0;
2702   a->rowvalues    = 0;
2703   a->getrowactive = PETSC_FALSE;
2704 
2705   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2706   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2707 
2708   if (oldmat->colmap) {
2709 #if defined(PETSC_USE_CTABLE)
2710     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2711 #else
2712     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2713     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2714     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2715 #endif
2716   } else a->colmap = 0;
2717   if (oldmat->garray) {
2718     PetscInt len;
2719     len  = oldmat->B->cmap->n;
2720     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2721     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2722     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2723   } else a->garray = 0;
2724 
2725   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2726   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2727   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2728   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2729   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2730   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2731   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2732   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2733   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2734   *newmat = mat;
2735   PetscFunctionReturn(0);
2736 }
2737 
2738 
2739 
2740 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2741 {
2742   PetscScalar    *vals,*svals;
2743   MPI_Comm       comm;
2744   PetscErrorCode ierr;
2745   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2746   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2747   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2748   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2749   PetscInt       cend,cstart,n,*rowners;
2750   int            fd;
2751   PetscInt       bs = newMat->rmap->bs;
2752 
2753   PetscFunctionBegin;
2754   /* force binary viewer to load .info file if it has not yet done so */
2755   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2756   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2757   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2758   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2759   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2760   if (!rank) {
2761     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2762     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2763     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2764   }
2765 
2766   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2767   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2768   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2769   if (bs < 0) bs = 1;
2770 
2771   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2772   M    = header[1]; N = header[2];
2773 
2774   /* If global sizes are set, check if they are consistent with that given in the file */
2775   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2776   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2777 
2778   /* determine ownership of all (block) rows */
2779   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2780   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2781   else m = newMat->rmap->n; /* Set by user */
2782 
2783   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2784   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2785 
2786   /* First process needs enough room for process with most rows */
2787   if (!rank) {
2788     mmax = rowners[1];
2789     for (i=2; i<=size; i++) {
2790       mmax = PetscMax(mmax, rowners[i]);
2791     }
2792   } else mmax = -1;             /* unused, but compilers complain */
2793 
2794   rowners[0] = 0;
2795   for (i=2; i<=size; i++) {
2796     rowners[i] += rowners[i-1];
2797   }
2798   rstart = rowners[rank];
2799   rend   = rowners[rank+1];
2800 
2801   /* distribute row lengths to all processors */
2802   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2803   if (!rank) {
2804     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2805     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2806     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2807     for (j=0; j<m; j++) {
2808       procsnz[0] += ourlens[j];
2809     }
2810     for (i=1; i<size; i++) {
2811       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2812       /* calculate the number of nonzeros on each processor */
2813       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2814         procsnz[i] += rowlengths[j];
2815       }
2816       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2817     }
2818     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2819   } else {
2820     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2821   }
2822 
2823   if (!rank) {
2824     /* determine max buffer needed and allocate it */
2825     maxnz = 0;
2826     for (i=0; i<size; i++) {
2827       maxnz = PetscMax(maxnz,procsnz[i]);
2828     }
2829     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2830 
2831     /* read in my part of the matrix column indices  */
2832     nz   = procsnz[0];
2833     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2834     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2835 
2836     /* read in every one elses and ship off */
2837     for (i=1; i<size; i++) {
2838       nz   = procsnz[i];
2839       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2840       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2841     }
2842     ierr = PetscFree(cols);CHKERRQ(ierr);
2843   } else {
2844     /* determine buffer space needed for message */
2845     nz = 0;
2846     for (i=0; i<m; i++) {
2847       nz += ourlens[i];
2848     }
2849     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2850 
2851     /* receive message of column indices*/
2852     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2853   }
2854 
2855   /* determine column ownership if matrix is not square */
2856   if (N != M) {
2857     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2858     else n = newMat->cmap->n;
2859     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2860     cstart = cend - n;
2861   } else {
2862     cstart = rstart;
2863     cend   = rend;
2864     n      = cend - cstart;
2865   }
2866 
2867   /* loop over local rows, determining number of off diagonal entries */
2868   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2869   jj   = 0;
2870   for (i=0; i<m; i++) {
2871     for (j=0; j<ourlens[i]; j++) {
2872       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2873       jj++;
2874     }
2875   }
2876 
2877   for (i=0; i<m; i++) {
2878     ourlens[i] -= offlens[i];
2879   }
2880   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2881 
2882   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2883 
2884   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2885 
2886   for (i=0; i<m; i++) {
2887     ourlens[i] += offlens[i];
2888   }
2889 
2890   if (!rank) {
2891     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2892 
2893     /* read in my part of the matrix numerical values  */
2894     nz   = procsnz[0];
2895     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2896 
2897     /* insert into matrix */
2898     jj      = rstart;
2899     smycols = mycols;
2900     svals   = vals;
2901     for (i=0; i<m; i++) {
2902       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2903       smycols += ourlens[i];
2904       svals   += ourlens[i];
2905       jj++;
2906     }
2907 
2908     /* read in other processors and ship out */
2909     for (i=1; i<size; i++) {
2910       nz   = procsnz[i];
2911       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2912       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2913     }
2914     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2915   } else {
2916     /* receive numeric values */
2917     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2918 
2919     /* receive message of values*/
2920     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2921 
2922     /* insert into matrix */
2923     jj      = rstart;
2924     smycols = mycols;
2925     svals   = vals;
2926     for (i=0; i<m; i++) {
2927       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2928       smycols += ourlens[i];
2929       svals   += ourlens[i];
2930       jj++;
2931     }
2932   }
2933   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2934   ierr = PetscFree(vals);CHKERRQ(ierr);
2935   ierr = PetscFree(mycols);CHKERRQ(ierr);
2936   ierr = PetscFree(rowners);CHKERRQ(ierr);
2937   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2938   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2939   PetscFunctionReturn(0);
2940 }
2941 
2942 /* Not scalable because of ISAllGather() unless getting all columns. */
2943 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2944 {
2945   PetscErrorCode ierr;
2946   IS             iscol_local;
2947   PetscBool      isstride;
2948   PetscMPIInt    lisstride=0,gisstride;
2949 
2950   PetscFunctionBegin;
2951   /* check if we are grabbing all columns*/
2952   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2953 
2954   if (isstride) {
2955     PetscInt  start,len,mstart,mlen;
2956     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2957     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2958     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2959     if (mstart == start && mlen-mstart == len) lisstride = 1;
2960   }
2961 
2962   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2963   if (gisstride) {
2964     PetscInt N;
2965     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
2966     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
2967     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
2968     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
2969   } else {
2970     PetscInt cbs;
2971     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
2972     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2973     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
2974   }
2975 
2976   *isseq = iscol_local;
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 PetscErrorCode ISGetSeqIS_SameDist_Private(Mat mat,IS iscol,IS *iscol_sub,IS *iscmap)
2981 {
2982   PetscErrorCode ierr;
2983   Vec            x,cmap;
2984   const PetscInt *is_idx;
2985   PetscScalar    *xarray,*cmaparray;
2986   PetscInt       n,isstart,*idx,*cmap1,count;
2987   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
2988   Mat            B=a->B;
2989   Vec            lvec = a->lvec,lcmap;
2990   PetscInt       i,j,cstart,cend,Bn=B->cmap->N,*garray = a->garray;
2991   MPI_Comm       comm;
2992   PetscMPIInt    rank;
2993 
2994   PetscFunctionBegin;
2995   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2996   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2997 
2998   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
2999 
3000   /* find isstart */
3001   ierr = MPI_Scan(&n,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3002   isstart -= n;
3003   //printf("[%d] n %d, isstart %d\n",rank,n,isstart);
3004 
3005   /* (1) iscol is a sub-vector of mat, pad it with '-1.' to form a full vector x */
3006   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3007   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3008 
3009   ierr = MatCreateVecs(mat,&cmap,NULL);CHKERRQ(ierr);
3010   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3011 
3012   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3013 
3014   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3015   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3016   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3017   for (i=0; i<n; i++) {
3018     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3019     cmaparray[is_idx[i]-cstart] = i + isstart;
3020   }
3021   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3022   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3023   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3024 
3025   /* (2) scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3026   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3027 
3028   ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3029   ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3030 
3031   ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3032   ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3033 
3034   if (lvec->map->n != Bn) SETERRQ2(PETSC_COMM_SELF,0,"n(lvec) %d != Bn %d",lvec->map->n,Bn);
3035   if (rank == -1) {
3036     printf("[%d] lvec:\n",rank);
3037     ierr = VecView(lvec,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
3038   }
3039 
3040   /* (3) create scalable iscol_sub (a subset of iscol) and iscmap */
3041   ierr = PetscMalloc2(n+Bn,&idx,n+Bn,&cmap1);CHKERRQ(ierr);
3042   count = 0;
3043 
3044   /* A part */
3045   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3046   j = cstart;
3047   for (i=0; i<n; i++) {
3048     if (j >= cend) break;
3049     if (is_idx[i] == j) {
3050       idx[count]  = j;
3051       cmap1[count] = i + isstart; /* column index in submat */
3052       count++; j++;
3053     } else if (is_idx[i] > j) {
3054       while (is_idx[i] > j && j < cend-1) j++;
3055       if (is_idx[i] == j) {
3056         idx[count]  = j;
3057         cmap1[count] = i + isstart; /* column index in submat */
3058         count++; j++;
3059       }
3060     }
3061   }
3062   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3063 
3064   /* B part */
3065   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3066   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3067   j = 0;
3068   for (i=0; i<Bn; i++) {
3069     if (j >= Bn) break;
3070     if ((PetscInt)xarray[i] == garray[j]) {
3071       idx[count]   = garray[j];
3072       cmap1[count] = cmaparray[i];  /* column index in submat */
3073       count++; j++;
3074     } else if ((PetscInt)xarray[i] > garray[j]) {
3075       while ((PetscInt)xarray[i] > garray[j] && j < Bn-1) j++;
3076       if ((PetscInt)xarray[i] == garray[j]) {
3077         idx[count]  = garray[j];
3078         cmap1[count] = cmaparray[i]; /* column index in submat */
3079         count++; j++;
3080       }
3081     }
3082   }
3083   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3084   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3085   //printf("[%d] n %d + Bn %d = %d; count %d, isN %d, matN %d\n",rank,n,Bn,n+Bn,count,N,mat->cmap->N);
3086 
3087   ierr = PetscSortInt(count,idx);CHKERRQ(ierr);  //should idx be sorted with cmap1???
3088   ierr = PetscSortInt(count,cmap1);CHKERRQ(ierr);
3089 
3090   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_sub);CHKERRQ(ierr);
3091   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,cmap1,PETSC_COPY_VALUES,iscmap);CHKERRQ(ierr);
3092 
3093   if (rank == -1) {
3094     printf("[%d] iscmap:\n",rank);
3095     ierr = ISView(*iscmap,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
3096   }
3097 
3098   ierr = PetscFree2(idx,cmap1);CHKERRQ(ierr);
3099   ierr = VecDestroy(&x);CHKERRQ(ierr);
3100   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3101   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3102   PetscFunctionReturn(0);
3103 }
3104 
3105 extern PetscErrorCode MatCreateSubMatrix_MPIAIJ_Private_SameDist(Mat,IS,IS,IS,PetscInt,MatReuse,Mat*);
3106 
3107 /* isrow has same processor distribution as mat, avoid iscol_local which uses O(mat->cmap->N) ctable */
3108 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3109 {
3110   PetscErrorCode ierr;
3111   IS             iscol_local;
3112   PetscInt       csize;
3113   MPI_Comm       comm;
3114   PetscMPIInt    rank;
3115   PetscBool      sorted,sameColDist = PETSC_FALSE,tsameColDist;
3116   PetscInt       n,i,j,cstart,cend,N;
3117 
3118   PetscFunctionBegin;
3119   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3120   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3121   //ierr = ISView(iscol,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
3122 
3123   /* If iscol has same processor distribution as mat, then use a scalable routine */
3124   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr); //must be locally sorted???
3125   if (!sorted) printf("[%d] iscol !sorted\n",rank);
3126 
3127   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3128   ierr = ISGetSize(iscol,&N);CHKERRQ(ierr);
3129   if (!n) {
3130     sameColDist = PETSC_TRUE;
3131   } else {
3132     ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3133     ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3134     if (i >= cstart && j < cend) sameColDist = PETSC_TRUE;
3135   }
3136   ierr = MPIU_Allreduce(&sameColDist,&tsameColDist,1,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3137   if (!rank) printf("[%d]..SubMatrix_MPIAIJ_SameDist ...reuse %d, sameColDist for column %d \n",rank,call,tsameColDist);
3138 
3139   if (tsameColDist) { /* get iscol_sub without calling ISAllGather() */
3140     IS iscol_sub,iscmap;
3141     ierr = ISGetSeqIS_SameDist_Private(mat,iscol,&iscol_sub,&iscmap);CHKERRQ(ierr);
3142 
3143     if (rank == -1) {
3144       printf("[%d] iscol_sub:\n",rank);
3145       ierr = ISView(iscol_sub,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
3146 
3147       //printf("[%d] iscmap:\n",rank);
3148       //ierr = ISView(iscmap,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
3149     }
3150     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3151     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3152   }
3153   //-----------------------------------------------
3154 
3155   /* Get nonscalable iscol_local -- will replace it with iscol_sub! */
3156   if (call == MAT_REUSE_MATRIX) {
3157     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3158     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3159   } else {
3160     ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3161     PetscBool      sorted;
3162     ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3163     if (!sorted) printf("[%d] iscol_local !sorted\n",rank);
3164   }
3165 
3166   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3167   ierr = MatCreateSubMatrix_MPIAIJ_Private_SameDist(mat,isrow,iscol,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3168 
3169   if (call == MAT_INITIAL_MATRIX) {
3170     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3171     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3172   }
3173   PetscFunctionReturn(0);
3174 }
3175 
3176 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3177 {
3178   PetscErrorCode ierr;
3179   IS             iscol_local;
3180   PetscInt       csize;
3181   PetscInt       n,i,j,rstart,rend;
3182   PetscBool      sameRowDist=PETSC_FALSE,tsameRowDist;
3183   MPI_Comm       comm;
3184 
3185   PetscFunctionBegin;
3186   /* If isrow has same processor distribution as mat, then use a scalable routine */
3187   if (call == MAT_REUSE_MATRIX) {
3188     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3189     if (iscol_local) tsameRowDist = PETSC_TRUE;
3190   } else {
3191     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3192     if (!n) {
3193       sameRowDist = PETSC_TRUE;
3194     } else {
3195       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3196       ierr = MatGetOwnershipRange(mat,&rstart,&rend);CHKERRQ(ierr);
3197       if (i >= rstart && j < rend) sameRowDist = PETSC_TRUE;
3198     }
3199     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3200     ierr = MPIU_Allreduce(&sameRowDist,&tsameRowDist,1,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3201   }
3202 
3203   if (tsameRowDist) {
3204     ierr = MatCreateSubMatrix_MPIAIJ_SameDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3205     PetscFunctionReturn(0);
3206   }
3207 
3208   /* General case                            */
3209   /* --------------------------------------- */
3210   if (call == MAT_REUSE_MATRIX) {
3211     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3212     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3213   } else {
3214     ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3215   }
3216 
3217   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3218   ierr = MatCreateSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3219 
3220   if (call == MAT_INITIAL_MATRIX) {
3221     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3222     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3223   }
3224   PetscFunctionReturn(0);
3225 }
3226 
3227 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*);
3228 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3229 
3230 PetscErrorCode MatCreateSubMatrix_MPIAIJ_Private_SameDist(Mat mat,IS isrow,IS iscolmpi,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3231 {
3232   PetscErrorCode ierr;
3233   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3234   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3235   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3236   Mat            M,Msub,B=a->B;
3237   MatScalar      *aa;
3238   Mat_SeqAIJ     *aij;
3239   PetscInt       *garray = a->garray,*colsub;
3240   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3241   IS             iscol_sub,iscmap;
3242   const PetscInt *is_idx,*cmap;
3243 
3244   PetscFunctionBegin;
3245   MPI_Comm       comm;
3246   PetscMPIInt    rank;
3247   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3249   //printf("SubMatrix_MPIAIJ_Private_SameDist ...\n");
3250   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); /* iscol should be sequential */
3251 
3252   if (call == MAT_INITIAL_MATRIX) {
3253     /* create scalable iscol_sub (a subset of iscol) */
3254     PetscInt *idx,*cmap1;
3255 
3256     PetscBool      sorted;
3257     ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
3258     if (!sorted) printf("iscol_local !sorted\n");
3259 
3260     ierr = PetscMalloc2(n,&idx,n,&cmap1);CHKERRQ(ierr);
3261     ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3262     count = 0;
3263 
3264     /* A part */
3265     j = cstart;
3266     for (i=0; i<n; i++) {
3267       if (j >= cend) break;
3268       if (is_idx[i] == j) {
3269         idx[count]  = j;
3270         cmap1[count] = i; /* column index in submat */
3271         count++; j++;
3272       } else if (is_idx[i] > j) {
3273         while (is_idx[i] > j && j < cend-1) j++;
3274         if (is_idx[i] == j) {
3275           idx[count]  = j;
3276           cmap1[count] = i; /* column index in submat */
3277           count++; j++;
3278         }
3279       }
3280     }
3281 
3282     /* B part */
3283     j = 0;
3284     for (i=0; i<n; i++) {
3285       if (j >= Bn) break;
3286       if (is_idx[i] == garray[j]) {
3287         idx[count]  = garray[j];
3288         cmap1[count] = i;  /* column index in submat */
3289         count++; j++;
3290       } else if (is_idx[i] > garray[j]) {
3291         while (is_idx[i] > garray[j] && j < Bn-1) j++;
3292         if (is_idx[i] == garray[j]) {
3293           idx[count]  = garray[j];
3294           cmap1[count] = i; /* column index in submat */
3295           count++; j++;
3296         }
3297       }
3298     }
3299     ierr = PetscSortInt(count,cmap1);CHKERRQ(ierr);
3300     ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3301 
3302     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol),count,idx,PETSC_COPY_VALUES,&iscol_sub);CHKERRQ(ierr);
3303     ierr = ISSort(iscol_sub);CHKERRQ(ierr);
3304 
3305     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol),count,cmap1,PETSC_COPY_VALUES,&iscmap);CHKERRQ(ierr);
3306     ierr = PetscFree2(idx,cmap1);CHKERRQ(ierr);
3307 
3308     //printf("\n[%d] old count %d, isN %d, matN %d\n",rank,count,n,mat->cmap->N);
3309     if (rank == -1) {
3310       printf("\n[%d] old iscol_sub:\n",rank);
3311       ierr = ISView(iscol_sub,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
3312 
3313       //printf("\n[%d] old iscmap:\n",rank);
3314       //ierr = ISView(iscmap,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
3315     }
3316 
3317     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3318 
3319   } else { /* call ==  MAT_REUSE_MATRIX */
3320     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3321     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3322     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3323 
3324     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3325     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3326 
3327     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3328     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3329 
3330     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3331   }
3332 
3333   aij = (Mat_SeqAIJ*)(Msub)->data;
3334   ii  = aij->i;
3335   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3336 
3337   /*
3338       m - number of local rows
3339       n - number of columns (same on all processors)
3340       rstart - first row in new global matrix generated
3341   */
3342   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3343 
3344   if (call == MAT_INITIAL_MATRIX) {
3345     MPI_Comm       comm;
3346     PetscMPIInt    rank,size;
3347 
3348     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3349     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3350     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3351 
3352     /*
3353         Determine the number of non-zeros in the diagonal and off-diagonal
3354         portions of the matrix in order to do correct preallocation
3355     */
3356 
3357     /* first get start and end of "diagonal" columns */
3358     if (csize == PETSC_DECIDE) {
3359       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3360       if (mglobal == n) { /* square matrix */
3361         nlocal = m;
3362       } else {
3363         nlocal = n/size + ((n % size) > rank);
3364       }
3365     } else {
3366       nlocal = csize;
3367     }
3368     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3369     rstart = rend - nlocal;
3370     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3371 
3372     /* next, compute all the lengths */
3373     jj    = aij->j;
3374     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3375     olens = dlens + m;
3376     for (i=0; i<m; i++) {
3377       jend = ii[i+1] - ii[i];
3378       olen = 0;
3379       dlen = 0;
3380       for (j=0; j<jend; j++) {
3381         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3382         else dlen++;
3383         jj++;
3384       }
3385       olens[i] = olen;
3386       dlens[i] = dlen;
3387     }
3388     ierr = MatGetBlockSizes(Msub,&bs,&cbs);CHKERRQ(ierr);
3389 
3390     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3391     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3392     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3393     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3394     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3395     ierr = PetscFree(dlens);CHKERRQ(ierr);
3396   } else {
3397     M    = *newmat;
3398     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3399     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3400     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3401     /*
3402          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3403        rather than the slower MatSetValues().
3404     */
3405     M->was_assembled = PETSC_TRUE;
3406     M->assembled     = PETSC_FALSE;
3407   }
3408 
3409   /* set values of Msub to *newmat */
3410   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3411   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3412 
3413   jj   = aij->j;
3414   aa   = aij->a;
3415   for (i=0; i<m; i++) {
3416     row = rstart + i;
3417     nz  = ii[i+1] - ii[i];
3418     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3419     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3420     jj += nz; aa += nz;
3421   }
3422   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3423 
3424   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3425   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3426   *newmat = M;
3427 
3428   ierr = PetscFree(colsub);CHKERRQ(ierr);
3429 
3430   /* save Msub, iscol_sub and iscmap used in processor for next request */
3431   if (call ==  MAT_INITIAL_MATRIX) {
3432     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3433     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3434 
3435     ierr = PetscObjectCompose((PetscObject)M,"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3436     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3437 
3438     ierr = PetscObjectCompose((PetscObject)M,"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3439     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3440   }
3441   PetscFunctionReturn(0);
3442 }
3443 
3444 /*
3445     Not great since it makes two copies of the submatrix, first an SeqAIJ
3446   in local and then by concatenating the local matrices the end result.
3447   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3448 
3449   Note: This requires a sequential iscol with all indices.
3450 */
3451 PetscErrorCode MatCreateSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3452 {
3453   PetscErrorCode ierr;
3454   PetscMPIInt    rank,size;
3455   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3456   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3457   Mat            M,Mreuse;
3458   MatScalar      *aa,*vwork;
3459   MPI_Comm       comm;
3460   Mat_SeqAIJ     *aij;
3461 
3462   PetscFunctionBegin;
3463   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3464   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3465   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3466 
3467   if (call ==  MAT_REUSE_MATRIX) {
3468     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3469     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3470     ierr = MatCreateSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
3471   } else {
3472     ierr = MatCreateSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
3473   }
3474 
3475   /*
3476       m - number of local rows
3477       n - number of columns (same on all processors)
3478       rstart - first row in new global matrix generated
3479   */
3480   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3481   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3482   if (call == MAT_INITIAL_MATRIX) {
3483     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3484     ii  = aij->i;
3485     jj  = aij->j;
3486 
3487     /*
3488         Determine the number of non-zeros in the diagonal and off-diagonal
3489         portions of the matrix in order to do correct preallocation
3490     */
3491 
3492     /* first get start and end of "diagonal" columns */
3493     if (csize == PETSC_DECIDE) {
3494       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3495       if (mglobal == n) { /* square matrix */
3496         nlocal = m;
3497       } else {
3498         nlocal = n/size + ((n % size) > rank);
3499       }
3500     } else {
3501       nlocal = csize;
3502     }
3503     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3504     rstart = rend - nlocal;
3505     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3506 
3507     /* next, compute all the lengths */
3508     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3509     olens = dlens + m;
3510     for (i=0; i<m; i++) {
3511       jend = ii[i+1] - ii[i];
3512       olen = 0;
3513       dlen = 0;
3514       for (j=0; j<jend; j++) {
3515         if (*jj < rstart || *jj >= rend) olen++;
3516         else dlen++;
3517         jj++;
3518       }
3519       olens[i] = olen;
3520       dlens[i] = dlen;
3521     }
3522     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3523     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3524     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3525     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3526     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3527     ierr = PetscFree(dlens);CHKERRQ(ierr);
3528   } else {
3529     PetscInt ml,nl;
3530 
3531     M    = *newmat;
3532     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3533     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3534     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3535     /*
3536          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3537        rather than the slower MatSetValues().
3538     */
3539     M->was_assembled = PETSC_TRUE;
3540     M->assembled     = PETSC_FALSE;
3541   }
3542   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3543   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3544   ii   = aij->i;
3545   jj   = aij->j;
3546   aa   = aij->a;
3547   for (i=0; i<m; i++) {
3548     row   = rstart + i;
3549     nz    = ii[i+1] - ii[i];
3550     cwork = jj;     jj += nz;
3551     vwork = aa;     aa += nz;
3552     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3553   }
3554 
3555   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3556   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3557   *newmat = M;
3558 
3559   /* save submatrix used in processor for next request */
3560   if (call ==  MAT_INITIAL_MATRIX) {
3561     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3562     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3563   }
3564   PetscFunctionReturn(0);
3565 }
3566 
3567 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3568 {
3569   PetscInt       m,cstart, cend,j,nnz,i,d;
3570   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3571   const PetscInt *JJ;
3572   PetscScalar    *values;
3573   PetscErrorCode ierr;
3574   PetscBool      nooffprocentries;
3575 
3576   PetscFunctionBegin;
3577   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3578 
3579   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3580   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3581   m      = B->rmap->n;
3582   cstart = B->cmap->rstart;
3583   cend   = B->cmap->rend;
3584   rstart = B->rmap->rstart;
3585 
3586   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3587 
3588 #if defined(PETSC_USE_DEBUGGING)
3589   for (i=0; i<m; i++) {
3590     nnz = Ii[i+1]- Ii[i];
3591     JJ  = J + Ii[i];
3592     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3593     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3594     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3595   }
3596 #endif
3597 
3598   for (i=0; i<m; i++) {
3599     nnz     = Ii[i+1]- Ii[i];
3600     JJ      = J + Ii[i];
3601     nnz_max = PetscMax(nnz_max,nnz);
3602     d       = 0;
3603     for (j=0; j<nnz; j++) {
3604       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3605     }
3606     d_nnz[i] = d;
3607     o_nnz[i] = nnz - d;
3608   }
3609   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3610   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3611 
3612   if (v) values = (PetscScalar*)v;
3613   else {
3614     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3615   }
3616 
3617   for (i=0; i<m; i++) {
3618     ii   = i + rstart;
3619     nnz  = Ii[i+1]- Ii[i];
3620     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3621   }
3622   nooffprocentries    = B->nooffprocentries;
3623   B->nooffprocentries = PETSC_TRUE;
3624   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3625   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3626   B->nooffprocentries = nooffprocentries;
3627 
3628   if (!v) {
3629     ierr = PetscFree(values);CHKERRQ(ierr);
3630   }
3631   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3632   PetscFunctionReturn(0);
3633 }
3634 
3635 /*@
3636    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3637    (the default parallel PETSc format).
3638 
3639    Collective on MPI_Comm
3640 
3641    Input Parameters:
3642 +  B - the matrix
3643 .  i - the indices into j for the start of each local row (starts with zero)
3644 .  j - the column indices for each local row (starts with zero)
3645 -  v - optional values in the matrix
3646 
3647    Level: developer
3648 
3649    Notes:
3650        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3651      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3652      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3653 
3654        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3655 
3656        The format which is used for the sparse matrix input, is equivalent to a
3657     row-major ordering.. i.e for the following matrix, the input data expected is
3658     as shown
3659 
3660 $        1 0 0
3661 $        2 0 3     P0
3662 $       -------
3663 $        4 5 6     P1
3664 $
3665 $     Process0 [P0]: rows_owned=[0,1]
3666 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3667 $        j =  {0,0,2}  [size = 3]
3668 $        v =  {1,2,3}  [size = 3]
3669 $
3670 $     Process1 [P1]: rows_owned=[2]
3671 $        i =  {0,3}    [size = nrow+1  = 1+1]
3672 $        j =  {0,1,2}  [size = 3]
3673 $        v =  {4,5,6}  [size = 3]
3674 
3675 .keywords: matrix, aij, compressed row, sparse, parallel
3676 
3677 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3678           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3679 @*/
3680 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3681 {
3682   PetscErrorCode ierr;
3683 
3684   PetscFunctionBegin;
3685   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3686   PetscFunctionReturn(0);
3687 }
3688 
3689 /*@C
3690    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3691    (the default parallel PETSc format).  For good matrix assembly performance
3692    the user should preallocate the matrix storage by setting the parameters
3693    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3694    performance can be increased by more than a factor of 50.
3695 
3696    Collective on MPI_Comm
3697 
3698    Input Parameters:
3699 +  B - the matrix
3700 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3701            (same value is used for all local rows)
3702 .  d_nnz - array containing the number of nonzeros in the various rows of the
3703            DIAGONAL portion of the local submatrix (possibly different for each row)
3704            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3705            The size of this array is equal to the number of local rows, i.e 'm'.
3706            For matrices that will be factored, you must leave room for (and set)
3707            the diagonal entry even if it is zero.
3708 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3709            submatrix (same value is used for all local rows).
3710 -  o_nnz - array containing the number of nonzeros in the various rows of the
3711            OFF-DIAGONAL portion of the local submatrix (possibly different for
3712            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3713            structure. The size of this array is equal to the number
3714            of local rows, i.e 'm'.
3715 
3716    If the *_nnz parameter is given then the *_nz parameter is ignored
3717 
3718    The AIJ format (also called the Yale sparse matrix format or
3719    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3720    storage.  The stored row and column indices begin with zero.
3721    See Users-Manual: ch_mat for details.
3722 
3723    The parallel matrix is partitioned such that the first m0 rows belong to
3724    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3725    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3726 
3727    The DIAGONAL portion of the local submatrix of a processor can be defined
3728    as the submatrix which is obtained by extraction the part corresponding to
3729    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3730    first row that belongs to the processor, r2 is the last row belonging to
3731    the this processor, and c1-c2 is range of indices of the local part of a
3732    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3733    common case of a square matrix, the row and column ranges are the same and
3734    the DIAGONAL part is also square. The remaining portion of the local
3735    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3736 
3737    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3738 
3739    You can call MatGetInfo() to get information on how effective the preallocation was;
3740    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3741    You can also run with the option -info and look for messages with the string
3742    malloc in them to see if additional memory allocation was needed.
3743 
3744    Example usage:
3745 
3746    Consider the following 8x8 matrix with 34 non-zero values, that is
3747    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3748    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3749    as follows:
3750 
3751 .vb
3752             1  2  0  |  0  3  0  |  0  4
3753     Proc0   0  5  6  |  7  0  0  |  8  0
3754             9  0 10  | 11  0  0  | 12  0
3755     -------------------------------------
3756            13  0 14  | 15 16 17  |  0  0
3757     Proc1   0 18  0  | 19 20 21  |  0  0
3758             0  0  0  | 22 23  0  | 24  0
3759     -------------------------------------
3760     Proc2  25 26 27  |  0  0 28  | 29  0
3761            30  0  0  | 31 32 33  |  0 34
3762 .ve
3763 
3764    This can be represented as a collection of submatrices as:
3765 
3766 .vb
3767       A B C
3768       D E F
3769       G H I
3770 .ve
3771 
3772    Where the submatrices A,B,C are owned by proc0, D,E,F are
3773    owned by proc1, G,H,I are owned by proc2.
3774 
3775    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3776    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3777    The 'M','N' parameters are 8,8, and have the same values on all procs.
3778 
3779    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3780    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3781    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3782    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3783    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3784    matrix, ans [DF] as another SeqAIJ matrix.
3785 
3786    When d_nz, o_nz parameters are specified, d_nz storage elements are
3787    allocated for every row of the local diagonal submatrix, and o_nz
3788    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3789    One way to choose d_nz and o_nz is to use the max nonzerors per local
3790    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3791    In this case, the values of d_nz,o_nz are:
3792 .vb
3793      proc0 : dnz = 2, o_nz = 2
3794      proc1 : dnz = 3, o_nz = 2
3795      proc2 : dnz = 1, o_nz = 4
3796 .ve
3797    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3798    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3799    for proc3. i.e we are using 12+15+10=37 storage locations to store
3800    34 values.
3801 
3802    When d_nnz, o_nnz parameters are specified, the storage is specified
3803    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3804    In the above case the values for d_nnz,o_nnz are:
3805 .vb
3806      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3807      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3808      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3809 .ve
3810    Here the space allocated is sum of all the above values i.e 34, and
3811    hence pre-allocation is perfect.
3812 
3813    Level: intermediate
3814 
3815 .keywords: matrix, aij, compressed row, sparse, parallel
3816 
3817 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3818           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3819 @*/
3820 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3821 {
3822   PetscErrorCode ierr;
3823 
3824   PetscFunctionBegin;
3825   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3826   PetscValidType(B,1);
3827   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3828   PetscFunctionReturn(0);
3829 }
3830 
3831 /*@
3832      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3833          CSR format the local rows.
3834 
3835    Collective on MPI_Comm
3836 
3837    Input Parameters:
3838 +  comm - MPI communicator
3839 .  m - number of local rows (Cannot be PETSC_DECIDE)
3840 .  n - This value should be the same as the local size used in creating the
3841        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3842        calculated if N is given) For square matrices n is almost always m.
3843 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3844 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3845 .   i - row indices
3846 .   j - column indices
3847 -   a - matrix values
3848 
3849    Output Parameter:
3850 .   mat - the matrix
3851 
3852    Level: intermediate
3853 
3854    Notes:
3855        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3856      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3857      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3858 
3859        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3860 
3861        The format which is used for the sparse matrix input, is equivalent to a
3862     row-major ordering.. i.e for the following matrix, the input data expected is
3863     as shown
3864 
3865 $        1 0 0
3866 $        2 0 3     P0
3867 $       -------
3868 $        4 5 6     P1
3869 $
3870 $     Process0 [P0]: rows_owned=[0,1]
3871 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3872 $        j =  {0,0,2}  [size = 3]
3873 $        v =  {1,2,3}  [size = 3]
3874 $
3875 $     Process1 [P1]: rows_owned=[2]
3876 $        i =  {0,3}    [size = nrow+1  = 1+1]
3877 $        j =  {0,1,2}  [size = 3]
3878 $        v =  {4,5,6}  [size = 3]
3879 
3880 .keywords: matrix, aij, compressed row, sparse, parallel
3881 
3882 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3883           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3884 @*/
3885 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3886 {
3887   PetscErrorCode ierr;
3888 
3889   PetscFunctionBegin;
3890   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3891   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3892   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3893   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3894   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3895   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3896   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3897   PetscFunctionReturn(0);
3898 }
3899 
3900 /*@C
3901    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3902    (the default parallel PETSc format).  For good matrix assembly performance
3903    the user should preallocate the matrix storage by setting the parameters
3904    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3905    performance can be increased by more than a factor of 50.
3906 
3907    Collective on MPI_Comm
3908 
3909    Input Parameters:
3910 +  comm - MPI communicator
3911 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3912            This value should be the same as the local size used in creating the
3913            y vector for the matrix-vector product y = Ax.
3914 .  n - This value should be the same as the local size used in creating the
3915        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3916        calculated if N is given) For square matrices n is almost always m.
3917 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3918 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3919 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3920            (same value is used for all local rows)
3921 .  d_nnz - array containing the number of nonzeros in the various rows of the
3922            DIAGONAL portion of the local submatrix (possibly different for each row)
3923            or NULL, if d_nz is used to specify the nonzero structure.
3924            The size of this array is equal to the number of local rows, i.e 'm'.
3925 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3926            submatrix (same value is used for all local rows).
3927 -  o_nnz - array containing the number of nonzeros in the various rows of the
3928            OFF-DIAGONAL portion of the local submatrix (possibly different for
3929            each row) or NULL, if o_nz is used to specify the nonzero
3930            structure. The size of this array is equal to the number
3931            of local rows, i.e 'm'.
3932 
3933    Output Parameter:
3934 .  A - the matrix
3935 
3936    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3937    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3938    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3939 
3940    Notes:
3941    If the *_nnz parameter is given then the *_nz parameter is ignored
3942 
3943    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3944    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3945    storage requirements for this matrix.
3946 
3947    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3948    processor than it must be used on all processors that share the object for
3949    that argument.
3950 
3951    The user MUST specify either the local or global matrix dimensions
3952    (possibly both).
3953 
3954    The parallel matrix is partitioned across processors such that the
3955    first m0 rows belong to process 0, the next m1 rows belong to
3956    process 1, the next m2 rows belong to process 2 etc.. where
3957    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3958    values corresponding to [m x N] submatrix.
3959 
3960    The columns are logically partitioned with the n0 columns belonging
3961    to 0th partition, the next n1 columns belonging to the next
3962    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3963 
3964    The DIAGONAL portion of the local submatrix on any given processor
3965    is the submatrix corresponding to the rows and columns m,n
3966    corresponding to the given processor. i.e diagonal matrix on
3967    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3968    etc. The remaining portion of the local submatrix [m x (N-n)]
3969    constitute the OFF-DIAGONAL portion. The example below better
3970    illustrates this concept.
3971 
3972    For a square global matrix we define each processor's diagonal portion
3973    to be its local rows and the corresponding columns (a square submatrix);
3974    each processor's off-diagonal portion encompasses the remainder of the
3975    local matrix (a rectangular submatrix).
3976 
3977    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3978 
3979    When calling this routine with a single process communicator, a matrix of
3980    type SEQAIJ is returned.  If a matrix of type MATMPIAIJ is desired for this
3981    type of communicator, use the construction mechanism:
3982      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3983 
3984    By default, this format uses inodes (identical nodes) when possible.
3985    We search for consecutive rows with the same nonzero structure, thereby
3986    reusing matrix information to achieve increased efficiency.
3987 
3988    Options Database Keys:
3989 +  -mat_no_inode  - Do not use inodes
3990 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3991 -  -mat_aij_oneindex - Internally use indexing starting at 1
3992         rather than 0.  Note that when calling MatSetValues(),
3993         the user still MUST index entries starting at 0!
3994 
3995 
3996    Example usage:
3997 
3998    Consider the following 8x8 matrix with 34 non-zero values, that is
3999    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4000    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4001    as follows:
4002 
4003 .vb
4004             1  2  0  |  0  3  0  |  0  4
4005     Proc0   0  5  6  |  7  0  0  |  8  0
4006             9  0 10  | 11  0  0  | 12  0
4007     -------------------------------------
4008            13  0 14  | 15 16 17  |  0  0
4009     Proc1   0 18  0  | 19 20 21  |  0  0
4010             0  0  0  | 22 23  0  | 24  0
4011     -------------------------------------
4012     Proc2  25 26 27  |  0  0 28  | 29  0
4013            30  0  0  | 31 32 33  |  0 34
4014 .ve
4015 
4016    This can be represented as a collection of submatrices as:
4017 
4018 .vb
4019       A B C
4020       D E F
4021       G H I
4022 .ve
4023 
4024    Where the submatrices A,B,C are owned by proc0, D,E,F are
4025    owned by proc1, G,H,I are owned by proc2.
4026 
4027    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4028    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4029    The 'M','N' parameters are 8,8, and have the same values on all procs.
4030 
4031    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4032    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4033    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4034    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4035    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4036    matrix, ans [DF] as another SeqAIJ matrix.
4037 
4038    When d_nz, o_nz parameters are specified, d_nz storage elements are
4039    allocated for every row of the local diagonal submatrix, and o_nz
4040    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4041    One way to choose d_nz and o_nz is to use the max nonzerors per local
4042    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4043    In this case, the values of d_nz,o_nz are:
4044 .vb
4045      proc0 : dnz = 2, o_nz = 2
4046      proc1 : dnz = 3, o_nz = 2
4047      proc2 : dnz = 1, o_nz = 4
4048 .ve
4049    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4050    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4051    for proc3. i.e we are using 12+15+10=37 storage locations to store
4052    34 values.
4053 
4054    When d_nnz, o_nnz parameters are specified, the storage is specified
4055    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4056    In the above case the values for d_nnz,o_nnz are:
4057 .vb
4058      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4059      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4060      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4061 .ve
4062    Here the space allocated is sum of all the above values i.e 34, and
4063    hence pre-allocation is perfect.
4064 
4065    Level: intermediate
4066 
4067 .keywords: matrix, aij, compressed row, sparse, parallel
4068 
4069 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4070           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4071 @*/
4072 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4073 {
4074   PetscErrorCode ierr;
4075   PetscMPIInt    size;
4076 
4077   PetscFunctionBegin;
4078   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4079   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4080   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4081   if (size > 1) {
4082     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4083     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4084   } else {
4085     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4086     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4087   }
4088   PetscFunctionReturn(0);
4089 }
4090 
4091 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4092 {
4093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4094   PetscBool      flg;
4095   PetscErrorCode ierr;
4096 
4097   PetscFunctionBegin;
4098   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4099   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4100   if (Ad)     *Ad     = a->A;
4101   if (Ao)     *Ao     = a->B;
4102   if (colmap) *colmap = a->garray;
4103   PetscFunctionReturn(0);
4104 }
4105 
4106 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4107 {
4108   PetscErrorCode ierr;
4109   PetscInt       m,N,i,rstart,nnz,Ii;
4110   PetscInt       *indx;
4111   PetscScalar    *values;
4112 
4113   PetscFunctionBegin;
4114   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4115   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4116     PetscInt       *dnz,*onz,sum,bs,cbs;
4117 
4118     if (n == PETSC_DECIDE) {
4119       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4120     }
4121     /* Check sum(n) = N */
4122     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4123     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4124 
4125     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4126     rstart -= m;
4127 
4128     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4129     for (i=0; i<m; i++) {
4130       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4131       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4132       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4133     }
4134 
4135     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4136     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4137     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4138     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4139     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4140     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4141     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4142     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4143   }
4144 
4145   /* numeric phase */
4146   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4147   for (i=0; i<m; i++) {
4148     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4149     Ii   = i + rstart;
4150     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4151     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4152   }
4153   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4154   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4155   PetscFunctionReturn(0);
4156 }
4157 
4158 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4159 {
4160   PetscErrorCode    ierr;
4161   PetscMPIInt       rank;
4162   PetscInt          m,N,i,rstart,nnz;
4163   size_t            len;
4164   const PetscInt    *indx;
4165   PetscViewer       out;
4166   char              *name;
4167   Mat               B;
4168   const PetscScalar *values;
4169 
4170   PetscFunctionBegin;
4171   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4172   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4173   /* Should this be the type of the diagonal block of A? */
4174   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4175   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4176   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4177   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4178   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4179   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4180   for (i=0; i<m; i++) {
4181     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4182     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4183     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4184   }
4185   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4186   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4187 
4188   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4189   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4190   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4191   sprintf(name,"%s.%d",outfile,rank);
4192   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4193   ierr = PetscFree(name);CHKERRQ(ierr);
4194   ierr = MatView(B,out);CHKERRQ(ierr);
4195   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4196   ierr = MatDestroy(&B);CHKERRQ(ierr);
4197   PetscFunctionReturn(0);
4198 }
4199 
4200 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4201 {
4202   PetscErrorCode      ierr;
4203   Mat_Merge_SeqsToMPI *merge;
4204   PetscContainer      container;
4205 
4206   PetscFunctionBegin;
4207   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4208   if (container) {
4209     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4210     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4211     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4212     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4213     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4214     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4215     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4216     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4217     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4218     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4219     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4220     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4221     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4222     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4223     ierr = PetscFree(merge);CHKERRQ(ierr);
4224     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4225   }
4226   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4227   PetscFunctionReturn(0);
4228 }
4229 
4230 #include <../src/mat/utils/freespace.h>
4231 #include <petscbt.h>
4232 
4233 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4234 {
4235   PetscErrorCode      ierr;
4236   MPI_Comm            comm;
4237   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4238   PetscMPIInt         size,rank,taga,*len_s;
4239   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4240   PetscInt            proc,m;
4241   PetscInt            **buf_ri,**buf_rj;
4242   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4243   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4244   MPI_Request         *s_waits,*r_waits;
4245   MPI_Status          *status;
4246   MatScalar           *aa=a->a;
4247   MatScalar           **abuf_r,*ba_i;
4248   Mat_Merge_SeqsToMPI *merge;
4249   PetscContainer      container;
4250 
4251   PetscFunctionBegin;
4252   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4253   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4254 
4255   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4256   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4257 
4258   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4259   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4260 
4261   bi     = merge->bi;
4262   bj     = merge->bj;
4263   buf_ri = merge->buf_ri;
4264   buf_rj = merge->buf_rj;
4265 
4266   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4267   owners = merge->rowmap->range;
4268   len_s  = merge->len_s;
4269 
4270   /* send and recv matrix values */
4271   /*-----------------------------*/
4272   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4273   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4274 
4275   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4276   for (proc=0,k=0; proc<size; proc++) {
4277     if (!len_s[proc]) continue;
4278     i    = owners[proc];
4279     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4280     k++;
4281   }
4282 
4283   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4284   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4285   ierr = PetscFree(status);CHKERRQ(ierr);
4286 
4287   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4288   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4289 
4290   /* insert mat values of mpimat */
4291   /*----------------------------*/
4292   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4293   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4294 
4295   for (k=0; k<merge->nrecv; k++) {
4296     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4297     nrows       = *(buf_ri_k[k]);
4298     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4299     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4300   }
4301 
4302   /* set values of ba */
4303   m = merge->rowmap->n;
4304   for (i=0; i<m; i++) {
4305     arow = owners[rank] + i;
4306     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4307     bnzi = bi[i+1] - bi[i];
4308     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4309 
4310     /* add local non-zero vals of this proc's seqmat into ba */
4311     anzi   = ai[arow+1] - ai[arow];
4312     aj     = a->j + ai[arow];
4313     aa     = a->a + ai[arow];
4314     nextaj = 0;
4315     for (j=0; nextaj<anzi; j++) {
4316       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4317         ba_i[j] += aa[nextaj++];
4318       }
4319     }
4320 
4321     /* add received vals into ba */
4322     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4323       /* i-th row */
4324       if (i == *nextrow[k]) {
4325         anzi   = *(nextai[k]+1) - *nextai[k];
4326         aj     = buf_rj[k] + *(nextai[k]);
4327         aa     = abuf_r[k] + *(nextai[k]);
4328         nextaj = 0;
4329         for (j=0; nextaj<anzi; j++) {
4330           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4331             ba_i[j] += aa[nextaj++];
4332           }
4333         }
4334         nextrow[k]++; nextai[k]++;
4335       }
4336     }
4337     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4338   }
4339   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4340   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4341 
4342   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4343   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4344   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4345   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4346   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4347   PetscFunctionReturn(0);
4348 }
4349 
4350 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4351 {
4352   PetscErrorCode      ierr;
4353   Mat                 B_mpi;
4354   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4355   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4356   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4357   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4358   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4359   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4360   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4361   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4362   MPI_Status          *status;
4363   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4364   PetscBT             lnkbt;
4365   Mat_Merge_SeqsToMPI *merge;
4366   PetscContainer      container;
4367 
4368   PetscFunctionBegin;
4369   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4370 
4371   /* make sure it is a PETSc comm */
4372   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4373   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4374   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4375 
4376   ierr = PetscNew(&merge);CHKERRQ(ierr);
4377   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4378 
4379   /* determine row ownership */
4380   /*---------------------------------------------------------*/
4381   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4382   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4383   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4384   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4385   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4386   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4387   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4388 
4389   m      = merge->rowmap->n;
4390   owners = merge->rowmap->range;
4391 
4392   /* determine the number of messages to send, their lengths */
4393   /*---------------------------------------------------------*/
4394   len_s = merge->len_s;
4395 
4396   len          = 0; /* length of buf_si[] */
4397   merge->nsend = 0;
4398   for (proc=0; proc<size; proc++) {
4399     len_si[proc] = 0;
4400     if (proc == rank) {
4401       len_s[proc] = 0;
4402     } else {
4403       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4404       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4405     }
4406     if (len_s[proc]) {
4407       merge->nsend++;
4408       nrows = 0;
4409       for (i=owners[proc]; i<owners[proc+1]; i++) {
4410         if (ai[i+1] > ai[i]) nrows++;
4411       }
4412       len_si[proc] = 2*(nrows+1);
4413       len         += len_si[proc];
4414     }
4415   }
4416 
4417   /* determine the number and length of messages to receive for ij-structure */
4418   /*-------------------------------------------------------------------------*/
4419   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4420   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4421 
4422   /* post the Irecv of j-structure */
4423   /*-------------------------------*/
4424   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4425   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4426 
4427   /* post the Isend of j-structure */
4428   /*--------------------------------*/
4429   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4430 
4431   for (proc=0, k=0; proc<size; proc++) {
4432     if (!len_s[proc]) continue;
4433     i    = owners[proc];
4434     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4435     k++;
4436   }
4437 
4438   /* receives and sends of j-structure are complete */
4439   /*------------------------------------------------*/
4440   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4441   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4442 
4443   /* send and recv i-structure */
4444   /*---------------------------*/
4445   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4446   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4447 
4448   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4449   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4450   for (proc=0,k=0; proc<size; proc++) {
4451     if (!len_s[proc]) continue;
4452     /* form outgoing message for i-structure:
4453          buf_si[0]:                 nrows to be sent
4454                [1:nrows]:           row index (global)
4455                [nrows+1:2*nrows+1]: i-structure index
4456     */
4457     /*-------------------------------------------*/
4458     nrows       = len_si[proc]/2 - 1;
4459     buf_si_i    = buf_si + nrows+1;
4460     buf_si[0]   = nrows;
4461     buf_si_i[0] = 0;
4462     nrows       = 0;
4463     for (i=owners[proc]; i<owners[proc+1]; i++) {
4464       anzi = ai[i+1] - ai[i];
4465       if (anzi) {
4466         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4467         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4468         nrows++;
4469       }
4470     }
4471     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4472     k++;
4473     buf_si += len_si[proc];
4474   }
4475 
4476   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4477   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4478 
4479   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4480   for (i=0; i<merge->nrecv; i++) {
4481     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4482   }
4483 
4484   ierr = PetscFree(len_si);CHKERRQ(ierr);
4485   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4486   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4487   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4488   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4489   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4490   ierr = PetscFree(status);CHKERRQ(ierr);
4491 
4492   /* compute a local seq matrix in each processor */
4493   /*----------------------------------------------*/
4494   /* allocate bi array and free space for accumulating nonzero column info */
4495   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4496   bi[0] = 0;
4497 
4498   /* create and initialize a linked list */
4499   nlnk = N+1;
4500   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4501 
4502   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4503   len  = ai[owners[rank+1]] - ai[owners[rank]];
4504   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4505 
4506   current_space = free_space;
4507 
4508   /* determine symbolic info for each local row */
4509   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4510 
4511   for (k=0; k<merge->nrecv; k++) {
4512     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4513     nrows       = *buf_ri_k[k];
4514     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4515     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4516   }
4517 
4518   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4519   len  = 0;
4520   for (i=0; i<m; i++) {
4521     bnzi = 0;
4522     /* add local non-zero cols of this proc's seqmat into lnk */
4523     arow  = owners[rank] + i;
4524     anzi  = ai[arow+1] - ai[arow];
4525     aj    = a->j + ai[arow];
4526     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4527     bnzi += nlnk;
4528     /* add received col data into lnk */
4529     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4530       if (i == *nextrow[k]) { /* i-th row */
4531         anzi  = *(nextai[k]+1) - *nextai[k];
4532         aj    = buf_rj[k] + *nextai[k];
4533         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4534         bnzi += nlnk;
4535         nextrow[k]++; nextai[k]++;
4536       }
4537     }
4538     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4539 
4540     /* if free space is not available, make more free space */
4541     if (current_space->local_remaining<bnzi) {
4542       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4543       nspacedouble++;
4544     }
4545     /* copy data into free space, then initialize lnk */
4546     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4547     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4548 
4549     current_space->array           += bnzi;
4550     current_space->local_used      += bnzi;
4551     current_space->local_remaining -= bnzi;
4552 
4553     bi[i+1] = bi[i] + bnzi;
4554   }
4555 
4556   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4557 
4558   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4559   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4560   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4561 
4562   /* create symbolic parallel matrix B_mpi */
4563   /*---------------------------------------*/
4564   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4565   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4566   if (n==PETSC_DECIDE) {
4567     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4568   } else {
4569     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4570   }
4571   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4572   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4573   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4574   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4575   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4576 
4577   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4578   B_mpi->assembled    = PETSC_FALSE;
4579   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4580   merge->bi           = bi;
4581   merge->bj           = bj;
4582   merge->buf_ri       = buf_ri;
4583   merge->buf_rj       = buf_rj;
4584   merge->coi          = NULL;
4585   merge->coj          = NULL;
4586   merge->owners_co    = NULL;
4587 
4588   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4589 
4590   /* attach the supporting struct to B_mpi for reuse */
4591   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4592   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4593   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4594   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4595   *mpimat = B_mpi;
4596 
4597   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4598   PetscFunctionReturn(0);
4599 }
4600 
4601 /*@C
4602       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4603                  matrices from each processor
4604 
4605     Collective on MPI_Comm
4606 
4607    Input Parameters:
4608 +    comm - the communicators the parallel matrix will live on
4609 .    seqmat - the input sequential matrices
4610 .    m - number of local rows (or PETSC_DECIDE)
4611 .    n - number of local columns (or PETSC_DECIDE)
4612 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4613 
4614    Output Parameter:
4615 .    mpimat - the parallel matrix generated
4616 
4617     Level: advanced
4618 
4619    Notes:
4620      The dimensions of the sequential matrix in each processor MUST be the same.
4621      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4622      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4623 @*/
4624 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4625 {
4626   PetscErrorCode ierr;
4627   PetscMPIInt    size;
4628 
4629   PetscFunctionBegin;
4630   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4631   if (size == 1) {
4632     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4633     if (scall == MAT_INITIAL_MATRIX) {
4634       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4635     } else {
4636       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4637     }
4638     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4639     PetscFunctionReturn(0);
4640   }
4641   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4642   if (scall == MAT_INITIAL_MATRIX) {
4643     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4644   }
4645   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4646   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4647   PetscFunctionReturn(0);
4648 }
4649 
4650 /*@
4651      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4652           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4653           with MatGetSize()
4654 
4655     Not Collective
4656 
4657    Input Parameters:
4658 +    A - the matrix
4659 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4660 
4661    Output Parameter:
4662 .    A_loc - the local sequential matrix generated
4663 
4664     Level: developer
4665 
4666 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4667 
4668 @*/
4669 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4670 {
4671   PetscErrorCode ierr;
4672   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4673   Mat_SeqAIJ     *mat,*a,*b;
4674   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4675   MatScalar      *aa,*ba,*cam;
4676   PetscScalar    *ca;
4677   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4678   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4679   PetscBool      match;
4680   MPI_Comm       comm;
4681   PetscMPIInt    size;
4682 
4683   PetscFunctionBegin;
4684   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4685   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4686   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4687   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4688   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4689 
4690   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4691   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4692   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4693   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4694   aa = a->a; ba = b->a;
4695   if (scall == MAT_INITIAL_MATRIX) {
4696     if (size == 1) {
4697       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4698       PetscFunctionReturn(0);
4699     }
4700 
4701     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4702     ci[0] = 0;
4703     for (i=0; i<am; i++) {
4704       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4705     }
4706     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4707     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4708     k    = 0;
4709     for (i=0; i<am; i++) {
4710       ncols_o = bi[i+1] - bi[i];
4711       ncols_d = ai[i+1] - ai[i];
4712       /* off-diagonal portion of A */
4713       for (jo=0; jo<ncols_o; jo++) {
4714         col = cmap[*bj];
4715         if (col >= cstart) break;
4716         cj[k]   = col; bj++;
4717         ca[k++] = *ba++;
4718       }
4719       /* diagonal portion of A */
4720       for (j=0; j<ncols_d; j++) {
4721         cj[k]   = cstart + *aj++;
4722         ca[k++] = *aa++;
4723       }
4724       /* off-diagonal portion of A */
4725       for (j=jo; j<ncols_o; j++) {
4726         cj[k]   = cmap[*bj++];
4727         ca[k++] = *ba++;
4728       }
4729     }
4730     /* put together the new matrix */
4731     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4732     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4733     /* Since these are PETSc arrays, change flags to free them as necessary. */
4734     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4735     mat->free_a  = PETSC_TRUE;
4736     mat->free_ij = PETSC_TRUE;
4737     mat->nonew   = 0;
4738   } else if (scall == MAT_REUSE_MATRIX) {
4739     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4740     ci = mat->i; cj = mat->j; cam = mat->a;
4741     for (i=0; i<am; i++) {
4742       /* off-diagonal portion of A */
4743       ncols_o = bi[i+1] - bi[i];
4744       for (jo=0; jo<ncols_o; jo++) {
4745         col = cmap[*bj];
4746         if (col >= cstart) break;
4747         *cam++ = *ba++; bj++;
4748       }
4749       /* diagonal portion of A */
4750       ncols_d = ai[i+1] - ai[i];
4751       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4752       /* off-diagonal portion of A */
4753       for (j=jo; j<ncols_o; j++) {
4754         *cam++ = *ba++; bj++;
4755       }
4756     }
4757   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4758   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4759   PetscFunctionReturn(0);
4760 }
4761 
4762 /*@C
4763      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4764 
4765     Not Collective
4766 
4767    Input Parameters:
4768 +    A - the matrix
4769 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4770 -    row, col - index sets of rows and columns to extract (or NULL)
4771 
4772    Output Parameter:
4773 .    A_loc - the local sequential matrix generated
4774 
4775     Level: developer
4776 
4777 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4778 
4779 @*/
4780 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4781 {
4782   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4783   PetscErrorCode ierr;
4784   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4785   IS             isrowa,iscola;
4786   Mat            *aloc;
4787   PetscBool      match;
4788 
4789   PetscFunctionBegin;
4790   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4791   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4792   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4793   if (!row) {
4794     start = A->rmap->rstart; end = A->rmap->rend;
4795     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4796   } else {
4797     isrowa = *row;
4798   }
4799   if (!col) {
4800     start = A->cmap->rstart;
4801     cmap  = a->garray;
4802     nzA   = a->A->cmap->n;
4803     nzB   = a->B->cmap->n;
4804     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4805     ncols = 0;
4806     for (i=0; i<nzB; i++) {
4807       if (cmap[i] < start) idx[ncols++] = cmap[i];
4808       else break;
4809     }
4810     imark = i;
4811     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4812     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4813     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4814   } else {
4815     iscola = *col;
4816   }
4817   if (scall != MAT_INITIAL_MATRIX) {
4818     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4819     aloc[0] = *A_loc;
4820   }
4821   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4822   *A_loc = aloc[0];
4823   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4824   if (!row) {
4825     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4826   }
4827   if (!col) {
4828     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4829   }
4830   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4831   PetscFunctionReturn(0);
4832 }
4833 
4834 /*@C
4835     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4836 
4837     Collective on Mat
4838 
4839    Input Parameters:
4840 +    A,B - the matrices in mpiaij format
4841 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4842 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4843 
4844    Output Parameter:
4845 +    rowb, colb - index sets of rows and columns of B to extract
4846 -    B_seq - the sequential matrix generated
4847 
4848     Level: developer
4849 
4850 @*/
4851 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4852 {
4853   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4854   PetscErrorCode ierr;
4855   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4856   IS             isrowb,iscolb;
4857   Mat            *bseq=NULL;
4858 
4859   PetscFunctionBegin;
4860   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4861     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4862   }
4863   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4864 
4865   if (scall == MAT_INITIAL_MATRIX) {
4866     start = A->cmap->rstart;
4867     cmap  = a->garray;
4868     nzA   = a->A->cmap->n;
4869     nzB   = a->B->cmap->n;
4870     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4871     ncols = 0;
4872     for (i=0; i<nzB; i++) {  /* row < local row index */
4873       if (cmap[i] < start) idx[ncols++] = cmap[i];
4874       else break;
4875     }
4876     imark = i;
4877     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4878     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4879     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4880     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4881   } else {
4882     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4883     isrowb  = *rowb; iscolb = *colb;
4884     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4885     bseq[0] = *B_seq;
4886   }
4887   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4888   *B_seq = bseq[0];
4889   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4890   if (!rowb) {
4891     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4892   } else {
4893     *rowb = isrowb;
4894   }
4895   if (!colb) {
4896     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4897   } else {
4898     *colb = iscolb;
4899   }
4900   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4901   PetscFunctionReturn(0);
4902 }
4903 
4904 /*
4905     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4906     of the OFF-DIAGONAL portion of local A
4907 
4908     Collective on Mat
4909 
4910    Input Parameters:
4911 +    A,B - the matrices in mpiaij format
4912 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4913 
4914    Output Parameter:
4915 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4916 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4917 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4918 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4919 
4920     Level: developer
4921 
4922 */
4923 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4924 {
4925   VecScatter_MPI_General *gen_to,*gen_from;
4926   PetscErrorCode         ierr;
4927   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4928   Mat_SeqAIJ             *b_oth;
4929   VecScatter             ctx =a->Mvctx;
4930   MPI_Comm               comm;
4931   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4932   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4933   PetscInt               *rvalues,*svalues;
4934   MatScalar              *b_otha,*bufa,*bufA;
4935   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4936   MPI_Request            *rwaits = NULL,*swaits = NULL;
4937   MPI_Status             *sstatus,rstatus;
4938   PetscMPIInt            jj,size;
4939   PetscInt               *cols,sbs,rbs;
4940   PetscScalar            *vals;
4941 
4942   PetscFunctionBegin;
4943   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4944   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4945 
4946   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4947     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4948   }
4949   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4950   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4951 
4952   if (size == 1) {
4953     startsj_s = NULL;
4954     bufa_ptr  = NULL;
4955     *B_oth    = NULL;
4956     PetscFunctionReturn(0);
4957   }
4958 
4959   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4960   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4961   nrecvs   = gen_from->n;
4962   nsends   = gen_to->n;
4963 
4964   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4965   srow    = gen_to->indices;    /* local row index to be sent */
4966   sstarts = gen_to->starts;
4967   sprocs  = gen_to->procs;
4968   sstatus = gen_to->sstatus;
4969   sbs     = gen_to->bs;
4970   rstarts = gen_from->starts;
4971   rprocs  = gen_from->procs;
4972   rbs     = gen_from->bs;
4973 
4974   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4975   if (scall == MAT_INITIAL_MATRIX) {
4976     /* i-array */
4977     /*---------*/
4978     /*  post receives */
4979     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
4980     for (i=0; i<nrecvs; i++) {
4981       rowlen = rvalues + rstarts[i]*rbs;
4982       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4983       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4984     }
4985 
4986     /* pack the outgoing message */
4987     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4988 
4989     sstartsj[0] = 0;
4990     rstartsj[0] = 0;
4991     len         = 0; /* total length of j or a array to be sent */
4992     k           = 0;
4993     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
4994     for (i=0; i<nsends; i++) {
4995       rowlen = svalues + sstarts[i]*sbs;
4996       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4997       for (j=0; j<nrows; j++) {
4998         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4999         for (l=0; l<sbs; l++) {
5000           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5001 
5002           rowlen[j*sbs+l] = ncols;
5003 
5004           len += ncols;
5005           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5006         }
5007         k++;
5008       }
5009       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5010 
5011       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5012     }
5013     /* recvs and sends of i-array are completed */
5014     i = nrecvs;
5015     while (i--) {
5016       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5017     }
5018     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5019     ierr = PetscFree(svalues);CHKERRQ(ierr);
5020 
5021     /* allocate buffers for sending j and a arrays */
5022     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5023     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5024 
5025     /* create i-array of B_oth */
5026     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5027 
5028     b_othi[0] = 0;
5029     len       = 0; /* total length of j or a array to be received */
5030     k         = 0;
5031     for (i=0; i<nrecvs; i++) {
5032       rowlen = rvalues + rstarts[i]*rbs;
5033       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5034       for (j=0; j<nrows; j++) {
5035         b_othi[k+1] = b_othi[k] + rowlen[j];
5036         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5037         k++;
5038       }
5039       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5040     }
5041     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5042 
5043     /* allocate space for j and a arrrays of B_oth */
5044     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5045     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5046 
5047     /* j-array */
5048     /*---------*/
5049     /*  post receives of j-array */
5050     for (i=0; i<nrecvs; i++) {
5051       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5052       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5053     }
5054 
5055     /* pack the outgoing message j-array */
5056     k = 0;
5057     for (i=0; i<nsends; i++) {
5058       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5059       bufJ  = bufj+sstartsj[i];
5060       for (j=0; j<nrows; j++) {
5061         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5062         for (ll=0; ll<sbs; ll++) {
5063           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5064           for (l=0; l<ncols; l++) {
5065             *bufJ++ = cols[l];
5066           }
5067           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5068         }
5069       }
5070       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5071     }
5072 
5073     /* recvs and sends of j-array are completed */
5074     i = nrecvs;
5075     while (i--) {
5076       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5077     }
5078     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5079   } else if (scall == MAT_REUSE_MATRIX) {
5080     sstartsj = *startsj_s;
5081     rstartsj = *startsj_r;
5082     bufa     = *bufa_ptr;
5083     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5084     b_otha   = b_oth->a;
5085   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5086 
5087   /* a-array */
5088   /*---------*/
5089   /*  post receives of a-array */
5090   for (i=0; i<nrecvs; i++) {
5091     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5092     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5093   }
5094 
5095   /* pack the outgoing message a-array */
5096   k = 0;
5097   for (i=0; i<nsends; i++) {
5098     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5099     bufA  = bufa+sstartsj[i];
5100     for (j=0; j<nrows; j++) {
5101       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5102       for (ll=0; ll<sbs; ll++) {
5103         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5104         for (l=0; l<ncols; l++) {
5105           *bufA++ = vals[l];
5106         }
5107         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5108       }
5109     }
5110     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5111   }
5112   /* recvs and sends of a-array are completed */
5113   i = nrecvs;
5114   while (i--) {
5115     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5116   }
5117   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5118   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5119 
5120   if (scall == MAT_INITIAL_MATRIX) {
5121     /* put together the new matrix */
5122     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5123 
5124     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5125     /* Since these are PETSc arrays, change flags to free them as necessary. */
5126     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5127     b_oth->free_a  = PETSC_TRUE;
5128     b_oth->free_ij = PETSC_TRUE;
5129     b_oth->nonew   = 0;
5130 
5131     ierr = PetscFree(bufj);CHKERRQ(ierr);
5132     if (!startsj_s || !bufa_ptr) {
5133       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5134       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5135     } else {
5136       *startsj_s = sstartsj;
5137       *startsj_r = rstartsj;
5138       *bufa_ptr  = bufa;
5139     }
5140   }
5141   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5142   PetscFunctionReturn(0);
5143 }
5144 
5145 /*@C
5146   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5147 
5148   Not Collective
5149 
5150   Input Parameters:
5151 . A - The matrix in mpiaij format
5152 
5153   Output Parameter:
5154 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5155 . colmap - A map from global column index to local index into lvec
5156 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5157 
5158   Level: developer
5159 
5160 @*/
5161 #if defined(PETSC_USE_CTABLE)
5162 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5163 #else
5164 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5165 #endif
5166 {
5167   Mat_MPIAIJ *a;
5168 
5169   PetscFunctionBegin;
5170   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5171   PetscValidPointer(lvec, 2);
5172   PetscValidPointer(colmap, 3);
5173   PetscValidPointer(multScatter, 4);
5174   a = (Mat_MPIAIJ*) A->data;
5175   if (lvec) *lvec = a->lvec;
5176   if (colmap) *colmap = a->colmap;
5177   if (multScatter) *multScatter = a->Mvctx;
5178   PetscFunctionReturn(0);
5179 }
5180 
5181 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5182 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5183 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5184 #if defined(PETSC_HAVE_ELEMENTAL)
5185 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5186 #endif
5187 #if defined(PETSC_HAVE_HYPRE)
5188 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5189 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5190 #endif
5191 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5192 
5193 /*
5194     Computes (B'*A')' since computing B*A directly is untenable
5195 
5196                n                       p                          p
5197         (              )       (              )         (                  )
5198       m (      A       )  *  n (       B      )   =   m (         C        )
5199         (              )       (              )         (                  )
5200 
5201 */
5202 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5203 {
5204   PetscErrorCode ierr;
5205   Mat            At,Bt,Ct;
5206 
5207   PetscFunctionBegin;
5208   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5209   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5210   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5211   ierr = MatDestroy(&At);CHKERRQ(ierr);
5212   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5213   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5214   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5215   PetscFunctionReturn(0);
5216 }
5217 
5218 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5219 {
5220   PetscErrorCode ierr;
5221   PetscInt       m=A->rmap->n,n=B->cmap->n;
5222   Mat            Cmat;
5223 
5224   PetscFunctionBegin;
5225   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5226   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5227   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5228   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5229   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5230   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5231   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5232   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5233 
5234   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5235 
5236   *C = Cmat;
5237   PetscFunctionReturn(0);
5238 }
5239 
5240 /* ----------------------------------------------------------------*/
5241 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5242 {
5243   PetscErrorCode ierr;
5244 
5245   PetscFunctionBegin;
5246   if (scall == MAT_INITIAL_MATRIX) {
5247     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5248     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5249     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5250   }
5251   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5252   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5253   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5254   PetscFunctionReturn(0);
5255 }
5256 
5257 /*MC
5258    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5259 
5260    Options Database Keys:
5261 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5262 
5263   Level: beginner
5264 
5265 .seealso: MatCreateAIJ()
5266 M*/
5267 
5268 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5269 {
5270   Mat_MPIAIJ     *b;
5271   PetscErrorCode ierr;
5272   PetscMPIInt    size;
5273 
5274   PetscFunctionBegin;
5275   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5276 
5277   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5278   B->data       = (void*)b;
5279   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5280   B->assembled  = PETSC_FALSE;
5281   B->insertmode = NOT_SET_VALUES;
5282   b->size       = size;
5283 
5284   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5285 
5286   /* build cache for off array entries formed */
5287   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5288 
5289   b->donotstash  = PETSC_FALSE;
5290   b->colmap      = 0;
5291   b->garray      = 0;
5292   b->roworiented = PETSC_TRUE;
5293 
5294   /* stuff used for matrix vector multiply */
5295   b->lvec  = NULL;
5296   b->Mvctx = NULL;
5297 
5298   /* stuff for MatGetRow() */
5299   b->rowindices   = 0;
5300   b->rowvalues    = 0;
5301   b->getrowactive = PETSC_FALSE;
5302 
5303   /* flexible pointer used in CUSP/CUSPARSE classes */
5304   b->spptr = NULL;
5305 
5306   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5307   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5308   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5309   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5310   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5311   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5312   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5313   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5314   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5315   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5316 #if defined(PETSC_HAVE_ELEMENTAL)
5317   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5318 #endif
5319 #if defined(PETSC_HAVE_HYPRE)
5320   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5321 #endif
5322   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5323   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5324   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5325   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5326 #if defined(PETSC_HAVE_HYPRE)
5327   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5328 #endif
5329   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5330   PetscFunctionReturn(0);
5331 }
5332 
5333 /*@C
5334      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5335          and "off-diagonal" part of the matrix in CSR format.
5336 
5337    Collective on MPI_Comm
5338 
5339    Input Parameters:
5340 +  comm - MPI communicator
5341 .  m - number of local rows (Cannot be PETSC_DECIDE)
5342 .  n - This value should be the same as the local size used in creating the
5343        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5344        calculated if N is given) For square matrices n is almost always m.
5345 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5346 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5347 .   i - row indices for "diagonal" portion of matrix
5348 .   j - column indices
5349 .   a - matrix values
5350 .   oi - row indices for "off-diagonal" portion of matrix
5351 .   oj - column indices
5352 -   oa - matrix values
5353 
5354    Output Parameter:
5355 .   mat - the matrix
5356 
5357    Level: advanced
5358 
5359    Notes:
5360        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5361        must free the arrays once the matrix has been destroyed and not before.
5362 
5363        The i and j indices are 0 based
5364 
5365        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5366 
5367        This sets local rows and cannot be used to set off-processor values.
5368 
5369        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5370        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5371        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5372        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5373        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5374        communication if it is known that only local entries will be set.
5375 
5376 .keywords: matrix, aij, compressed row, sparse, parallel
5377 
5378 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5379           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5380 @*/
5381 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5382 {
5383   PetscErrorCode ierr;
5384   Mat_MPIAIJ     *maij;
5385 
5386   PetscFunctionBegin;
5387   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5388   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5389   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5390   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5391   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5392   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5393   maij = (Mat_MPIAIJ*) (*mat)->data;
5394 
5395   (*mat)->preallocated = PETSC_TRUE;
5396 
5397   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5398   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5399 
5400   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5401   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5402 
5403   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5404   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5405   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5406   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5407 
5408   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5409   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5410   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5411   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5412   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5413   PetscFunctionReturn(0);
5414 }
5415 
5416 /*
5417     Special version for direct calls from Fortran
5418 */
5419 #include <petsc/private/fortranimpl.h>
5420 
5421 /* Change these macros so can be used in void function */
5422 #undef CHKERRQ
5423 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5424 #undef SETERRQ2
5425 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5426 #undef SETERRQ3
5427 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5428 #undef SETERRQ
5429 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5430 
5431 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5432 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5433 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5434 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5435 #else
5436 #endif
5437 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5438 {
5439   Mat            mat  = *mmat;
5440   PetscInt       m    = *mm, n = *mn;
5441   InsertMode     addv = *maddv;
5442   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5443   PetscScalar    value;
5444   PetscErrorCode ierr;
5445 
5446   MatCheckPreallocated(mat,1);
5447   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5448 
5449 #if defined(PETSC_USE_DEBUG)
5450   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5451 #endif
5452   {
5453     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5454     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5455     PetscBool roworiented = aij->roworiented;
5456 
5457     /* Some Variables required in the macro */
5458     Mat        A                 = aij->A;
5459     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5460     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5461     MatScalar  *aa               = a->a;
5462     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5463     Mat        B                 = aij->B;
5464     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5465     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5466     MatScalar  *ba               = b->a;
5467 
5468     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5469     PetscInt  nonew = a->nonew;
5470     MatScalar *ap1,*ap2;
5471 
5472     PetscFunctionBegin;
5473     for (i=0; i<m; i++) {
5474       if (im[i] < 0) continue;
5475 #if defined(PETSC_USE_DEBUG)
5476       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5477 #endif
5478       if (im[i] >= rstart && im[i] < rend) {
5479         row      = im[i] - rstart;
5480         lastcol1 = -1;
5481         rp1      = aj + ai[row];
5482         ap1      = aa + ai[row];
5483         rmax1    = aimax[row];
5484         nrow1    = ailen[row];
5485         low1     = 0;
5486         high1    = nrow1;
5487         lastcol2 = -1;
5488         rp2      = bj + bi[row];
5489         ap2      = ba + bi[row];
5490         rmax2    = bimax[row];
5491         nrow2    = bilen[row];
5492         low2     = 0;
5493         high2    = nrow2;
5494 
5495         for (j=0; j<n; j++) {
5496           if (roworiented) value = v[i*n+j];
5497           else value = v[i+j*m];
5498           if (in[j] >= cstart && in[j] < cend) {
5499             col = in[j] - cstart;
5500             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5501             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5502           } else if (in[j] < 0) continue;
5503 #if defined(PETSC_USE_DEBUG)
5504           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5505 #endif
5506           else {
5507             if (mat->was_assembled) {
5508               if (!aij->colmap) {
5509                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5510               }
5511 #if defined(PETSC_USE_CTABLE)
5512               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5513               col--;
5514 #else
5515               col = aij->colmap[in[j]] - 1;
5516 #endif
5517               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5518               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5519                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5520                 col  =  in[j];
5521                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5522                 B     = aij->B;
5523                 b     = (Mat_SeqAIJ*)B->data;
5524                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5525                 rp2   = bj + bi[row];
5526                 ap2   = ba + bi[row];
5527                 rmax2 = bimax[row];
5528                 nrow2 = bilen[row];
5529                 low2  = 0;
5530                 high2 = nrow2;
5531                 bm    = aij->B->rmap->n;
5532                 ba    = b->a;
5533               }
5534             } else col = in[j];
5535             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5536           }
5537         }
5538       } else if (!aij->donotstash) {
5539         if (roworiented) {
5540           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5541         } else {
5542           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5543         }
5544       }
5545     }
5546   }
5547   PetscFunctionReturnVoid();
5548 }
5549 
5550