xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8b7cb0e67edc0ad3c573fa4d3743f885f6250459)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatSetBlockSizes_MPIAIJ"
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
62 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
63 {
64   PetscErrorCode  ierr;
65   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
66   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
67   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
68   const PetscInt  *ia,*ib;
69   const MatScalar *aa,*bb;
70   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
71   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
72 
73   PetscFunctionBegin;
74   *keptrows = 0;
75   ia        = a->i;
76   ib        = b->i;
77   for (i=0; i<m; i++) {
78     na = ia[i+1] - ia[i];
79     nb = ib[i+1] - ib[i];
80     if (!na && !nb) {
81       cnt++;
82       goto ok1;
83     }
84     aa = a->a + ia[i];
85     for (j=0; j<na; j++) {
86       if (aa[j] != 0.0) goto ok1;
87     }
88     bb = b->a + ib[i];
89     for (j=0; j <nb; j++) {
90       if (bb[j] != 0.0) goto ok1;
91     }
92     cnt++;
93 ok1:;
94   }
95   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
96   if (!n0rows) PetscFunctionReturn(0);
97   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
98   cnt  = 0;
99   for (i=0; i<m; i++) {
100     na = ia[i+1] - ia[i];
101     nb = ib[i+1] - ib[i];
102     if (!na && !nb) continue;
103     aa = a->a + ia[i];
104     for (j=0; j<na;j++) {
105       if (aa[j] != 0.0) {
106         rows[cnt++] = rstart + i;
107         goto ok2;
108       }
109     }
110     bb = b->a + ib[i];
111     for (j=0; j<nb; j++) {
112       if (bb[j] != 0.0) {
113         rows[cnt++] = rstart + i;
114         goto ok2;
115       }
116     }
117 ok2:;
118   }
119   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
125 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
126 {
127   PetscErrorCode    ierr;
128   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
129 
130   PetscFunctionBegin;
131   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
132     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
133   } else {
134     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
135   }
136   PetscFunctionReturn(0);
137 }
138 
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
142 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
143 {
144   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
145   PetscErrorCode ierr;
146   PetscInt       i,rstart,nrows,*rows;
147 
148   PetscFunctionBegin;
149   *zrows = NULL;
150   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
151   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
152   for (i=0; i<nrows; i++) rows[i] += rstart;
153   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
159 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
160 {
161   PetscErrorCode ierr;
162   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
163   PetscInt       i,n,*garray = aij->garray;
164   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
165   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
166   PetscReal      *work;
167 
168   PetscFunctionBegin;
169   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
170   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
171   if (type == NORM_2) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
177     }
178   } else if (type == NORM_1) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
184     }
185   } else if (type == NORM_INFINITY) {
186     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
187       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
188     }
189     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
190       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
191     }
192 
193   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
194   if (type == NORM_INFINITY) {
195     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
196   } else {
197     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
198   }
199   ierr = PetscFree(work);CHKERRQ(ierr);
200   if (type == NORM_2) {
201     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
202   }
203   PetscFunctionReturn(0);
204 }
205 
206 #undef __FUNCT__
207 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
208 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
209 {
210   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
211   IS              sis,gis;
212   PetscErrorCode  ierr;
213   const PetscInt  *isis,*igis;
214   PetscInt        n,*iis,nsis,ngis,rstart,i;
215 
216   PetscFunctionBegin;
217   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
218   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
219   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
220   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
221   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
223 
224   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
225   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
226   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
227   n    = ngis + nsis;
228   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
230   for (i=0; i<n; i++) iis[i] += rstart;
231   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
232 
233   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
234   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
235   ierr = ISDestroy(&sis);CHKERRQ(ierr);
236   ierr = ISDestroy(&gis);CHKERRQ(ierr);
237   PetscFunctionReturn(0);
238 }
239 
240 #undef __FUNCT__
241 #define __FUNCT__ "MatDistribute_MPIAIJ"
242 /*
243     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
244     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
245 
246     Only for square matrices
247 
248     Used by a preconditioner, hence PETSC_EXTERN
249 */
250 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
251 {
252   PetscMPIInt    rank,size;
253   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
254   PetscErrorCode ierr;
255   Mat            mat;
256   Mat_SeqAIJ     *gmata;
257   PetscMPIInt    tag;
258   MPI_Status     status;
259   PetscBool      aij;
260   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
261 
262   PetscFunctionBegin;
263   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
264   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
265   if (!rank) {
266     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
267     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
268   }
269   if (reuse == MAT_INITIAL_MATRIX) {
270     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
271     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
272     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
273     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
274     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
275     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
276     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
277     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
278     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
279 
280     rowners[0] = 0;
281     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
282     rstart = rowners[rank];
283     rend   = rowners[rank+1];
284     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
285     if (!rank) {
286       gmata = (Mat_SeqAIJ*) gmat->data;
287       /* send row lengths to all processors */
288       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
289       for (i=1; i<size; i++) {
290         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
291       }
292       /* determine number diagonal and off-diagonal counts */
293       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
294       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
295       jj   = 0;
296       for (i=0; i<m; i++) {
297         for (j=0; j<dlens[i]; j++) {
298           if (gmata->j[jj] < rstart) ld[i]++;
299           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
300           jj++;
301         }
302       }
303       /* send column indices to other processes */
304       for (i=1; i<size; i++) {
305         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
306         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
307         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
308       }
309 
310       /* send numerical values to other processes */
311       for (i=1; i<size; i++) {
312         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
313         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
314       }
315       gmataa = gmata->a;
316       gmataj = gmata->j;
317 
318     } else {
319       /* receive row lengths */
320       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
321       /* receive column indices */
322       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
323       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
325       /* determine number diagonal and off-diagonal counts */
326       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
327       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
328       jj   = 0;
329       for (i=0; i<m; i++) {
330         for (j=0; j<dlens[i]; j++) {
331           if (gmataj[jj] < rstart) ld[i]++;
332           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
333           jj++;
334         }
335       }
336       /* receive numerical values */
337       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
338       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
339     }
340     /* set preallocation */
341     for (i=0; i<m; i++) {
342       dlens[i] -= olens[i];
343     }
344     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
345     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
346 
347     for (i=0; i<m; i++) {
348       dlens[i] += olens[i];
349     }
350     cnt = 0;
351     for (i=0; i<m; i++) {
352       row  = rstart + i;
353       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
354       cnt += dlens[i];
355     }
356     if (rank) {
357       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
358     }
359     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
360     ierr = PetscFree(rowners);CHKERRQ(ierr);
361 
362     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
363 
364     *inmat = mat;
365   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
366     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
367     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
368     mat  = *inmat;
369     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
370     if (!rank) {
371       /* send numerical values to other processes */
372       gmata  = (Mat_SeqAIJ*) gmat->data;
373       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
374       gmataa = gmata->a;
375       for (i=1; i<size; i++) {
376         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
377         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
378       }
379       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
380     } else {
381       /* receive numerical values from process 0*/
382       nz   = Ad->nz + Ao->nz;
383       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
384       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
385     }
386     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
387     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
388     ad = Ad->a;
389     ao = Ao->a;
390     if (mat->rmap->n) {
391       i  = 0;
392       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
393       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
394     }
395     for (i=1; i<mat->rmap->n; i++) {
396       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
397       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
398     }
399     i--;
400     if (mat->rmap->n) {
401       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (rank) {
404       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
405     }
406   }
407   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
408   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 /*
413   Local utility routine that creates a mapping from the global column
414 number to the local number in the off-diagonal part of the local
415 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
416 a slightly higher hash table cost; without it it is not scalable (each processor
417 has an order N integer array but is fast to acess.
418 */
419 #undef __FUNCT__
420 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
421 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
422 {
423   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
424   PetscErrorCode ierr;
425   PetscInt       n = aij->B->cmap->n,i;
426 
427   PetscFunctionBegin;
428   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
429 #if defined(PETSC_USE_CTABLE)
430   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
431   for (i=0; i<n; i++) {
432     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
433   }
434 #else
435   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
436   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
437   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
438 #endif
439   PetscFunctionReturn(0);
440 }
441 
442 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
443 { \
444     if (col <= lastcol1)  low1 = 0;     \
445     else                 high1 = nrow1; \
446     lastcol1 = col;\
447     while (high1-low1 > 5) { \
448       t = (low1+high1)/2; \
449       if (rp1[t] > col) high1 = t; \
450       else              low1  = t; \
451     } \
452       for (_i=low1; _i<high1; _i++) { \
453         if (rp1[_i] > col) break; \
454         if (rp1[_i] == col) { \
455           if (addv == ADD_VALUES) ap1[_i] += value;   \
456           else                    ap1[_i] = value; \
457           goto a_noinsert; \
458         } \
459       }  \
460       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
461       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
462       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
463       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
464       N = nrow1++ - 1; a->nz++; high1++; \
465       /* shift up all the later entries in this row */ \
466       for (ii=N; ii>=_i; ii--) { \
467         rp1[ii+1] = rp1[ii]; \
468         ap1[ii+1] = ap1[ii]; \
469       } \
470       rp1[_i] = col;  \
471       ap1[_i] = value;  \
472       A->nonzerostate++;\
473       a_noinsert: ; \
474       ailen[row] = nrow1; \
475 }
476 
477 
478 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
479   { \
480     if (col <= lastcol2) low2 = 0;                        \
481     else high2 = nrow2;                                   \
482     lastcol2 = col;                                       \
483     while (high2-low2 > 5) {                              \
484       t = (low2+high2)/2;                                 \
485       if (rp2[t] > col) high2 = t;                        \
486       else             low2  = t;                         \
487     }                                                     \
488     for (_i=low2; _i<high2; _i++) {                       \
489       if (rp2[_i] > col) break;                           \
490       if (rp2[_i] == col) {                               \
491         if (addv == ADD_VALUES) ap2[_i] += value;         \
492         else                    ap2[_i] = value;          \
493         goto b_noinsert;                                  \
494       }                                                   \
495     }                                                     \
496     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
497     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
498     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
499     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
500     N = nrow2++ - 1; b->nz++; high2++;                    \
501     /* shift up all the later entries in this row */      \
502     for (ii=N; ii>=_i; ii--) {                            \
503       rp2[ii+1] = rp2[ii];                                \
504       ap2[ii+1] = ap2[ii];                                \
505     }                                                     \
506     rp2[_i] = col;                                        \
507     ap2[_i] = value;                                      \
508     B->nonzerostate++;                                    \
509     b_noinsert: ;                                         \
510     bilen[row] = nrow2;                                   \
511   }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
515 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
516 {
517   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
518   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
519   PetscErrorCode ierr;
520   PetscInt       l,*garray = mat->garray,diag;
521 
522   PetscFunctionBegin;
523   /* code only works for square matrices A */
524 
525   /* find size of row to the left of the diagonal part */
526   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
527   row  = row - diag;
528   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
529     if (garray[b->j[b->i[row]+l]] > diag) break;
530   }
531   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
532 
533   /* diagonal part */
534   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
535 
536   /* right of diagonal part */
537   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatSetValues_MPIAIJ"
543 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
544 {
545   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
546   PetscScalar    value;
547   PetscErrorCode ierr;
548   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
549   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
550   PetscBool      roworiented = aij->roworiented;
551 
552   /* Some Variables required in the macro */
553   Mat        A                 = aij->A;
554   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
555   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
556   MatScalar  *aa               = a->a;
557   PetscBool  ignorezeroentries = a->ignorezeroentries;
558   Mat        B                 = aij->B;
559   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
560   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
561   MatScalar  *ba               = b->a;
562 
563   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
564   PetscInt  nonew;
565   MatScalar *ap1,*ap2;
566 
567   PetscFunctionBegin;
568   for (i=0; i<m; i++) {
569     if (im[i] < 0) continue;
570 #if defined(PETSC_USE_DEBUG)
571     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
572 #endif
573     if (im[i] >= rstart && im[i] < rend) {
574       row      = im[i] - rstart;
575       lastcol1 = -1;
576       rp1      = aj + ai[row];
577       ap1      = aa + ai[row];
578       rmax1    = aimax[row];
579       nrow1    = ailen[row];
580       low1     = 0;
581       high1    = nrow1;
582       lastcol2 = -1;
583       rp2      = bj + bi[row];
584       ap2      = ba + bi[row];
585       rmax2    = bimax[row];
586       nrow2    = bilen[row];
587       low2     = 0;
588       high2    = nrow2;
589 
590       for (j=0; j<n; j++) {
591         if (roworiented) value = v[i*n+j];
592         else             value = v[i+j*m];
593         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
594         if (in[j] >= cstart && in[j] < cend) {
595           col   = in[j] - cstart;
596           nonew = a->nonew;
597           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
598         } else if (in[j] < 0) continue;
599 #if defined(PETSC_USE_DEBUG)
600         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
601 #endif
602         else {
603           if (mat->was_assembled) {
604             if (!aij->colmap) {
605               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606             }
607 #if defined(PETSC_USE_CTABLE)
608             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
609             col--;
610 #else
611             col = aij->colmap[in[j]] - 1;
612 #endif
613             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
614               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
615               col  =  in[j];
616               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
617               B     = aij->B;
618               b     = (Mat_SeqAIJ*)B->data;
619               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
620               rp2   = bj + bi[row];
621               ap2   = ba + bi[row];
622               rmax2 = bimax[row];
623               nrow2 = bilen[row];
624               low2  = 0;
625               high2 = nrow2;
626               bm    = aij->B->rmap->n;
627               ba    = b->a;
628             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
629           } else col = in[j];
630           nonew = b->nonew;
631           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
632         }
633       }
634     } else {
635       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
636       if (!aij->donotstash) {
637         mat->assembled = PETSC_FALSE;
638         if (roworiented) {
639           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
640         } else {
641           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
642         }
643       }
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatGetValues_MPIAIJ"
651 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   PetscErrorCode ierr;
655   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
656   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
657 
658   PetscFunctionBegin;
659   for (i=0; i<m; i++) {
660     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
661     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
662     if (idxm[i] >= rstart && idxm[i] < rend) {
663       row = idxm[i] - rstart;
664       for (j=0; j<n; j++) {
665         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
666         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
667         if (idxn[j] >= cstart && idxn[j] < cend) {
668           col  = idxn[j] - cstart;
669           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
670         } else {
671           if (!aij->colmap) {
672             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
673           }
674 #if defined(PETSC_USE_CTABLE)
675           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
676           col--;
677 #else
678           col = aij->colmap[idxn[j]] - 1;
679 #endif
680           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
681           else {
682             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
683           }
684         }
685       }
686     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
695 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
696 {
697   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
698   PetscErrorCode ierr;
699   PetscInt       nstash,reallocs;
700 
701   PetscFunctionBegin;
702   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
703 
704   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
705   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
706   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
712 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
713 {
714   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
715   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
716   PetscErrorCode ierr;
717   PetscMPIInt    n;
718   PetscInt       i,j,rstart,ncols,flg;
719   PetscInt       *row,*col;
720   PetscBool      other_disassembled;
721   PetscScalar    *val;
722 
723   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
724 
725   PetscFunctionBegin;
726   if (!aij->donotstash && !mat->nooffprocentries) {
727     while (1) {
728       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
729       if (!flg) break;
730 
731       for (i=0; i<n; ) {
732         /* Now identify the consecutive vals belonging to the same row */
733         for (j=i,rstart=row[j]; j<n; j++) {
734           if (row[j] != rstart) break;
735         }
736         if (j < n) ncols = j-i;
737         else       ncols = n-i;
738         /* Now assemble all these values with a single function call */
739         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
740 
741         i = j;
742       }
743     }
744     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
745   }
746   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
747   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
748 
749   /* determine if any processor has disassembled, if so we must
750      also disassemble ourselfs, in order that we may reassemble. */
751   /*
752      if nonzero structure of submatrix B cannot change then we know that
753      no processor disassembled thus we can skip this stuff
754   */
755   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
756     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
757     if (mat->was_assembled && !other_disassembled) {
758       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
759     }
760   }
761   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
762     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
763   }
764   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
765   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
766   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
767 
768   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
769 
770   aij->rowvalues = 0;
771 
772   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
773   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
774 
775   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
776   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
777     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
778     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
779   }
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
785 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
786 {
787   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
792   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatZeroRows_MPIAIJ"
798 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
799 {
800   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
801   PetscInt      *lrows;
802   PetscInt       r, len;
803   PetscErrorCode ierr;
804 
805   PetscFunctionBegin;
806   /* get locally owned rows */
807   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
808   /* fix right hand side if needed */
809   if (x && b) {
810     const PetscScalar *xx;
811     PetscScalar       *bb;
812 
813     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
814     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
815     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
816     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
817     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
818   }
819   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
820   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
821   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
822     PetscBool cong;
823     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
824     if (cong) A->congruentlayouts = 1;
825     else      A->congruentlayouts = 0;
826   }
827   if ((diag != 0.0) && A->congruentlayouts) {
828     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
829   } else if (diag != 0.0) {
830     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
831     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
832     for (r = 0; r < len; ++r) {
833       const PetscInt row = lrows[r] + A->rmap->rstart;
834       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
835     }
836     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
837     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
838   } else {
839     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840   }
841   ierr = PetscFree(lrows);CHKERRQ(ierr);
842 
843   /* only change matrix nonzero state if pattern was allowed to be changed */
844   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
845     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
846     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
853 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
854 {
855   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
856   PetscErrorCode    ierr;
857   PetscMPIInt       n = A->rmap->n;
858   PetscInt          i,j,r,m,p = 0,len = 0;
859   PetscInt          *lrows,*owners = A->rmap->range;
860   PetscSFNode       *rrows;
861   PetscSF           sf;
862   const PetscScalar *xx;
863   PetscScalar       *bb,*mask;
864   Vec               xmask,lmask;
865   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
866   const PetscInt    *aj, *ii,*ridx;
867   PetscScalar       *aa;
868 
869   PetscFunctionBegin;
870   /* Create SF where leaves are input rows and roots are owned rows */
871   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
872   for (r = 0; r < n; ++r) lrows[r] = -1;
873   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
874   for (r = 0; r < N; ++r) {
875     const PetscInt idx   = rows[r];
876     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
877     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
878       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
879     }
880     rrows[r].rank  = p;
881     rrows[r].index = rows[r] - owners[p];
882   }
883   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
884   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
885   /* Collect flags for rows to be zeroed */
886   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
887   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
888   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
889   /* Compress and put in row numbers */
890   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
891   /* zero diagonal part of matrix */
892   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
893   /* handle off diagonal part of matrix */
894   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
895   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
896   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
897   for (i=0; i<len; i++) bb[lrows[i]] = 1;
898   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
899   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
900   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
901   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
902   if (x) {
903     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
906     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
907   }
908   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
909   /* remove zeroed rows of off diagonal matrix */
910   ii = aij->i;
911   for (i=0; i<len; i++) {
912     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
913   }
914   /* loop over all elements of off process part of matrix zeroing removed columns*/
915   if (aij->compressedrow.use) {
916     m    = aij->compressedrow.nrows;
917     ii   = aij->compressedrow.i;
918     ridx = aij->compressedrow.rindex;
919     for (i=0; i<m; i++) {
920       n  = ii[i+1] - ii[i];
921       aj = aij->j + ii[i];
922       aa = aij->a + ii[i];
923 
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[*ridx] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932       ridx++;
933     }
934   } else { /* do not use compressed row format */
935     m = l->B->rmap->n;
936     for (i=0; i<m; i++) {
937       n  = ii[i+1] - ii[i];
938       aj = aij->j + ii[i];
939       aa = aij->a + ii[i];
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[i] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948     }
949   }
950   if (x) {
951     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
952     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
953   }
954   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
955   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
956   ierr = PetscFree(lrows);CHKERRQ(ierr);
957 
958   /* only change matrix nonzero state if pattern was allowed to be changed */
959   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
960     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
961     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
962   }
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "MatMult_MPIAIJ"
968 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
969 {
970   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
971   PetscErrorCode ierr;
972   PetscInt       nt;
973 
974   PetscFunctionBegin;
975   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
976   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
977   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
979   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
986 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
987 {
988   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultAdd_MPIAIJ"
998 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002 
1003   PetscFunctionBegin;
1004   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1013 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1014 {
1015   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1016   PetscErrorCode ierr;
1017   PetscBool      merged;
1018 
1019   PetscFunctionBegin;
1020   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1021   /* do nondiagonal part */
1022   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1023   if (!merged) {
1024     /* send it on its way */
1025     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1026     /* do local part */
1027     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1028     /* receive remote parts: note this assumes the values are not actually */
1029     /* added in yy until the next line, */
1030     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1031   } else {
1032     /* do local part */
1033     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1034     /* send it on its way */
1035     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1036     /* values actually were received in the Begin() but we need to call this nop */
1037     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1038   }
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1044 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1045 {
1046   MPI_Comm       comm;
1047   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1048   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1049   IS             Me,Notme;
1050   PetscErrorCode ierr;
1051   PetscInt       M,N,first,last,*notme,i;
1052   PetscMPIInt    size;
1053 
1054   PetscFunctionBegin;
1055   /* Easy test: symmetric diagonal block */
1056   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1057   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1058   if (!*f) PetscFunctionReturn(0);
1059   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1060   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1061   if (size == 1) PetscFunctionReturn(0);
1062 
1063   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1064   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1065   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1066   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1067   for (i=0; i<first; i++) notme[i] = i;
1068   for (i=last; i<M; i++) notme[i-last+first] = i;
1069   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1070   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1071   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1072   Aoff = Aoffs[0];
1073   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1074   Boff = Boffs[0];
1075   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1076   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1077   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1078   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1079   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1080   ierr = PetscFree(notme);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1086 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1087 {
1088   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1089   PetscErrorCode ierr;
1090 
1091   PetscFunctionBegin;
1092   /* do nondiagonal part */
1093   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1094   /* send it on its way */
1095   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1096   /* do local part */
1097   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1098   /* receive remote parts */
1099   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 /*
1104   This only works correctly for square matrices where the subblock A->A is the
1105    diagonal block
1106 */
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1109 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1110 {
1111   PetscErrorCode ierr;
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113 
1114   PetscFunctionBegin;
1115   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1116   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1117   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatScale_MPIAIJ"
1123 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1124 {
1125   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1126   PetscErrorCode ierr;
1127 
1128   PetscFunctionBegin;
1129   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1130   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatDestroy_MPIAIJ"
1136 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1137 {
1138   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1139   PetscErrorCode ierr;
1140 
1141   PetscFunctionBegin;
1142 #if defined(PETSC_USE_LOG)
1143   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1144 #endif
1145   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1146   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1147   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1148   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1149 #if defined(PETSC_USE_CTABLE)
1150   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1151 #else
1152   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1153 #endif
1154   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1155   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1156   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1157   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1158   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1159   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1160 
1161   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1169 #if defined(PETSC_HAVE_ELEMENTAL)
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1171 #endif
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1177 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1178 {
1179   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1180   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1181   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1182   PetscErrorCode ierr;
1183   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1184   int            fd;
1185   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1186   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1187   PetscScalar    *column_values;
1188   PetscInt       message_count,flowcontrolcount;
1189   FILE           *file;
1190 
1191   PetscFunctionBegin;
1192   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1193   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1194   nz   = A->nz + B->nz;
1195   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1196   if (!rank) {
1197     header[0] = MAT_FILE_CLASSID;
1198     header[1] = mat->rmap->N;
1199     header[2] = mat->cmap->N;
1200 
1201     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1202     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1203     /* get largest number of rows any processor has */
1204     rlen  = mat->rmap->n;
1205     range = mat->rmap->range;
1206     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1207   } else {
1208     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1209     rlen = mat->rmap->n;
1210   }
1211 
1212   /* load up the local row counts */
1213   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1214   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1215 
1216   /* store the row lengths to the file */
1217   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1218   if (!rank) {
1219     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     for (i=1; i<size; i++) {
1221       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1222       rlen = range[i+1] - range[i];
1223       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1224       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1225     }
1226     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1227   } else {
1228     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1229     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1231   }
1232   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1233 
1234   /* load up the local column indices */
1235   nzmax = nz; /* th processor needs space a largest processor needs */
1236   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1237   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1238   cnt   = 0;
1239   for (i=0; i<mat->rmap->n; i++) {
1240     for (j=B->i[i]; j<B->i[i+1]; j++) {
1241       if ((col = garray[B->j[j]]) > cstart) break;
1242       column_indices[cnt++] = col;
1243     }
1244     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1245     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1246   }
1247   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1248 
1249   /* store the column indices to the file */
1250   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1251   if (!rank) {
1252     MPI_Status status;
1253     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1254     for (i=1; i<size; i++) {
1255       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1256       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1257       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1258       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1259       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1260     }
1261     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1262   } else {
1263     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1264     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1265     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1266     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1267   }
1268   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1269 
1270   /* load up the local column values */
1271   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1272   cnt  = 0;
1273   for (i=0; i<mat->rmap->n; i++) {
1274     for (j=B->i[i]; j<B->i[i+1]; j++) {
1275       if (garray[B->j[j]] > cstart) break;
1276       column_values[cnt++] = B->a[j];
1277     }
1278     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1279     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1280   }
1281   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1282 
1283   /* store the column values to the file */
1284   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1285   if (!rank) {
1286     MPI_Status status;
1287     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1288     for (i=1; i<size; i++) {
1289       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1290       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1291       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1292       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1293       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1294     }
1295     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1296   } else {
1297     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1298     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1299     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1300     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1301   }
1302   ierr = PetscFree(column_values);CHKERRQ(ierr);
1303 
1304   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1305   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #include <petscdraw.h>
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1312 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1313 {
1314   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1315   PetscErrorCode    ierr;
1316   PetscMPIInt       rank = aij->rank,size = aij->size;
1317   PetscBool         isdraw,iascii,isbinary;
1318   PetscViewer       sviewer;
1319   PetscViewerFormat format;
1320 
1321   PetscFunctionBegin;
1322   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1323   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1324   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1325   if (iascii) {
1326     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1327     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1328       MatInfo   info;
1329       PetscBool inodes;
1330 
1331       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1332       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1333       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1334       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1335       if (!inodes) {
1336         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1337                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1338       } else {
1339         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1340                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1341       }
1342       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1343       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1344       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1345       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1346       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1347       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1348       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1349       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1350       PetscFunctionReturn(0);
1351     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1352       PetscInt inodecount,inodelimit,*inodes;
1353       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1354       if (inodes) {
1355         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1356       } else {
1357         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1358       }
1359       PetscFunctionReturn(0);
1360     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1361       PetscFunctionReturn(0);
1362     }
1363   } else if (isbinary) {
1364     if (size == 1) {
1365       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1366       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1367     } else {
1368       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1369     }
1370     PetscFunctionReturn(0);
1371   } else if (isdraw) {
1372     PetscDraw draw;
1373     PetscBool isnull;
1374     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1375     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1376     if (isnull) PetscFunctionReturn(0);
1377   }
1378 
1379   {
1380     /* assemble the entire matrix onto first processor. */
1381     Mat        A;
1382     Mat_SeqAIJ *Aloc;
1383     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1384     MatScalar  *a;
1385 
1386     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1387     if (!rank) {
1388       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1389     } else {
1390       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1391     }
1392     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1393     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1394     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1395     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1396     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1397 
1398     /* copy over the A part */
1399     Aloc = (Mat_SeqAIJ*)aij->A->data;
1400     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1401     row  = mat->rmap->rstart;
1402     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1403     for (i=0; i<m; i++) {
1404       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1405       row++;
1406       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1407     }
1408     aj = Aloc->j;
1409     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1410 
1411     /* copy over the B part */
1412     Aloc = (Mat_SeqAIJ*)aij->B->data;
1413     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1414     row  = mat->rmap->rstart;
1415     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1416     ct   = cols;
1417     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1418     for (i=0; i<m; i++) {
1419       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1420       row++;
1421       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1422     }
1423     ierr = PetscFree(ct);CHKERRQ(ierr);
1424     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1425     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1426     /*
1427        Everyone has to call to draw the matrix since the graphics waits are
1428        synchronized across all processors that share the PetscDraw object
1429     */
1430     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1431     if (!rank) {
1432       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1433       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1434     }
1435     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1436     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1437     ierr = MatDestroy(&A);CHKERRQ(ierr);
1438   }
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ"
1444 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1445 {
1446   PetscErrorCode ierr;
1447   PetscBool      iascii,isdraw,issocket,isbinary;
1448 
1449   PetscFunctionBegin;
1450   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1451   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1452   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1453   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1454   if (iascii || isdraw || isbinary || issocket) {
1455     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1456   }
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatSOR_MPIAIJ"
1462 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1463 {
1464   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1465   PetscErrorCode ierr;
1466   Vec            bb1 = 0;
1467   PetscBool      hasop;
1468 
1469   PetscFunctionBegin;
1470   if (flag == SOR_APPLY_UPPER) {
1471     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1472     PetscFunctionReturn(0);
1473   }
1474 
1475   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1476     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1477   }
1478 
1479   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1480     if (flag & SOR_ZERO_INITIAL_GUESS) {
1481       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1482       its--;
1483     }
1484 
1485     while (its--) {
1486       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1487       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1488 
1489       /* update rhs: bb1 = bb - B*x */
1490       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1491       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1492 
1493       /* local sweep */
1494       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1495     }
1496   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1497     if (flag & SOR_ZERO_INITIAL_GUESS) {
1498       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1499       its--;
1500     }
1501     while (its--) {
1502       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504 
1505       /* update rhs: bb1 = bb - B*x */
1506       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1507       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1508 
1509       /* local sweep */
1510       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1511     }
1512   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1513     if (flag & SOR_ZERO_INITIAL_GUESS) {
1514       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1515       its--;
1516     }
1517     while (its--) {
1518       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520 
1521       /* update rhs: bb1 = bb - B*x */
1522       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1523       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1524 
1525       /* local sweep */
1526       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1527     }
1528   } else if (flag & SOR_EISENSTAT) {
1529     Vec xx1;
1530 
1531     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1532     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1533 
1534     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536     if (!mat->diag) {
1537       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1538       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1539     }
1540     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1541     if (hasop) {
1542       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1543     } else {
1544       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1545     }
1546     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1547 
1548     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1549 
1550     /* local sweep */
1551     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1552     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1553     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1554   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1555 
1556   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1557 
1558   matin->factorerrortype = mat->A->factorerrortype;
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatPermute_MPIAIJ"
1564 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1565 {
1566   Mat            aA,aB,Aperm;
1567   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1568   PetscScalar    *aa,*ba;
1569   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1570   PetscSF        rowsf,sf;
1571   IS             parcolp = NULL;
1572   PetscBool      done;
1573   PetscErrorCode ierr;
1574 
1575   PetscFunctionBegin;
1576   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1577   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1578   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1579   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1580 
1581   /* Invert row permutation to find out where my rows should go */
1582   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1583   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1584   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1585   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1586   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1587   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1588 
1589   /* Invert column permutation to find out where my columns should go */
1590   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1591   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1592   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1593   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1594   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1595   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1596   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1597 
1598   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1599   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1600   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1601 
1602   /* Find out where my gcols should go */
1603   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1604   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1605   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1606   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1607   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1608   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1609   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1610   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1611 
1612   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1613   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1614   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1615   for (i=0; i<m; i++) {
1616     PetscInt row = rdest[i],rowner;
1617     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1618     for (j=ai[i]; j<ai[i+1]; j++) {
1619       PetscInt cowner,col = cdest[aj[j]];
1620       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1621       if (rowner == cowner) dnnz[i]++;
1622       else onnz[i]++;
1623     }
1624     for (j=bi[i]; j<bi[i+1]; j++) {
1625       PetscInt cowner,col = gcdest[bj[j]];
1626       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1627       if (rowner == cowner) dnnz[i]++;
1628       else onnz[i]++;
1629     }
1630   }
1631   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1632   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1633   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1634   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1635   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1636 
1637   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1638   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1639   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1640   for (i=0; i<m; i++) {
1641     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1642     PetscInt j0,rowlen;
1643     rowlen = ai[i+1] - ai[i];
1644     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1645       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1646       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1647     }
1648     rowlen = bi[i+1] - bi[i];
1649     for (j0=j=0; j<rowlen; j0=j) {
1650       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1651       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1652     }
1653   }
1654   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1655   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1656   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1657   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1658   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1659   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1660   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1661   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1662   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1663   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1664   *B = Aperm;
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 #undef __FUNCT__
1669 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1670 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1671 {
1672   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1673   PetscErrorCode ierr;
1674 
1675   PetscFunctionBegin;
1676   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1677   if (ghosts) *ghosts = aij->garray;
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1683 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1684 {
1685   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1686   Mat            A    = mat->A,B = mat->B;
1687   PetscErrorCode ierr;
1688   PetscReal      isend[5],irecv[5];
1689 
1690   PetscFunctionBegin;
1691   info->block_size = 1.0;
1692   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1693 
1694   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1695   isend[3] = info->memory;  isend[4] = info->mallocs;
1696 
1697   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1698 
1699   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1700   isend[3] += info->memory;  isend[4] += info->mallocs;
1701   if (flag == MAT_LOCAL) {
1702     info->nz_used      = isend[0];
1703     info->nz_allocated = isend[1];
1704     info->nz_unneeded  = isend[2];
1705     info->memory       = isend[3];
1706     info->mallocs      = isend[4];
1707   } else if (flag == MAT_GLOBAL_MAX) {
1708     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1709 
1710     info->nz_used      = irecv[0];
1711     info->nz_allocated = irecv[1];
1712     info->nz_unneeded  = irecv[2];
1713     info->memory       = irecv[3];
1714     info->mallocs      = irecv[4];
1715   } else if (flag == MAT_GLOBAL_SUM) {
1716     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1717 
1718     info->nz_used      = irecv[0];
1719     info->nz_allocated = irecv[1];
1720     info->nz_unneeded  = irecv[2];
1721     info->memory       = irecv[3];
1722     info->mallocs      = irecv[4];
1723   }
1724   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1725   info->fill_ratio_needed = 0;
1726   info->factor_mallocs    = 0;
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatSetOption_MPIAIJ"
1732 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1733 {
1734   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1735   PetscErrorCode ierr;
1736 
1737   PetscFunctionBegin;
1738   switch (op) {
1739   case MAT_NEW_NONZERO_LOCATIONS:
1740   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1741   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1742   case MAT_KEEP_NONZERO_PATTERN:
1743   case MAT_NEW_NONZERO_LOCATION_ERR:
1744   case MAT_USE_INODES:
1745   case MAT_IGNORE_ZERO_ENTRIES:
1746     MatCheckPreallocated(A,1);
1747     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1748     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1749     break;
1750   case MAT_ROW_ORIENTED:
1751     MatCheckPreallocated(A,1);
1752     a->roworiented = flg;
1753 
1754     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1755     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1756     break;
1757   case MAT_NEW_DIAGONALS:
1758     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1759     break;
1760   case MAT_IGNORE_OFF_PROC_ENTRIES:
1761     a->donotstash = flg;
1762     break;
1763   case MAT_SPD:
1764     A->spd_set = PETSC_TRUE;
1765     A->spd     = flg;
1766     if (flg) {
1767       A->symmetric                  = PETSC_TRUE;
1768       A->structurally_symmetric     = PETSC_TRUE;
1769       A->symmetric_set              = PETSC_TRUE;
1770       A->structurally_symmetric_set = PETSC_TRUE;
1771     }
1772     break;
1773   case MAT_SYMMETRIC:
1774     MatCheckPreallocated(A,1);
1775     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1776     break;
1777   case MAT_STRUCTURALLY_SYMMETRIC:
1778     MatCheckPreallocated(A,1);
1779     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1780     break;
1781   case MAT_HERMITIAN:
1782     MatCheckPreallocated(A,1);
1783     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1784     break;
1785   case MAT_SYMMETRY_ETERNAL:
1786     MatCheckPreallocated(A,1);
1787     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1788     break;
1789   default:
1790     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1791   }
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 #undef __FUNCT__
1796 #define __FUNCT__ "MatGetRow_MPIAIJ"
1797 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1798 {
1799   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1800   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1801   PetscErrorCode ierr;
1802   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1803   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1804   PetscInt       *cmap,*idx_p;
1805 
1806   PetscFunctionBegin;
1807   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1808   mat->getrowactive = PETSC_TRUE;
1809 
1810   if (!mat->rowvalues && (idx || v)) {
1811     /*
1812         allocate enough space to hold information from the longest row.
1813     */
1814     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1815     PetscInt   max = 1,tmp;
1816     for (i=0; i<matin->rmap->n; i++) {
1817       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1818       if (max < tmp) max = tmp;
1819     }
1820     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1821   }
1822 
1823   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1824   lrow = row - rstart;
1825 
1826   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1827   if (!v)   {pvA = 0; pvB = 0;}
1828   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1829   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1830   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1831   nztot = nzA + nzB;
1832 
1833   cmap = mat->garray;
1834   if (v  || idx) {
1835     if (nztot) {
1836       /* Sort by increasing column numbers, assuming A and B already sorted */
1837       PetscInt imark = -1;
1838       if (v) {
1839         *v = v_p = mat->rowvalues;
1840         for (i=0; i<nzB; i++) {
1841           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1842           else break;
1843         }
1844         imark = i;
1845         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1846         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1847       }
1848       if (idx) {
1849         *idx = idx_p = mat->rowindices;
1850         if (imark > -1) {
1851           for (i=0; i<imark; i++) {
1852             idx_p[i] = cmap[cworkB[i]];
1853           }
1854         } else {
1855           for (i=0; i<nzB; i++) {
1856             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1857             else break;
1858           }
1859           imark = i;
1860         }
1861         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1862         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1863       }
1864     } else {
1865       if (idx) *idx = 0;
1866       if (v)   *v   = 0;
1867     }
1868   }
1869   *nz  = nztot;
1870   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1871   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 #undef __FUNCT__
1876 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1877 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1878 {
1879   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1880 
1881   PetscFunctionBegin;
1882   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1883   aij->getrowactive = PETSC_FALSE;
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 #undef __FUNCT__
1888 #define __FUNCT__ "MatNorm_MPIAIJ"
1889 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1890 {
1891   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1892   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1893   PetscErrorCode ierr;
1894   PetscInt       i,j,cstart = mat->cmap->rstart;
1895   PetscReal      sum = 0.0;
1896   MatScalar      *v;
1897 
1898   PetscFunctionBegin;
1899   if (aij->size == 1) {
1900     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1901   } else {
1902     if (type == NORM_FROBENIUS) {
1903       v = amat->a;
1904       for (i=0; i<amat->nz; i++) {
1905         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1906       }
1907       v = bmat->a;
1908       for (i=0; i<bmat->nz; i++) {
1909         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1910       }
1911       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1912       *norm = PetscSqrtReal(*norm);
1913       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1914     } else if (type == NORM_1) { /* max column norm */
1915       PetscReal *tmp,*tmp2;
1916       PetscInt  *jj,*garray = aij->garray;
1917       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1918       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1919       *norm = 0.0;
1920       v     = amat->a; jj = amat->j;
1921       for (j=0; j<amat->nz; j++) {
1922         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1923       }
1924       v = bmat->a; jj = bmat->j;
1925       for (j=0; j<bmat->nz; j++) {
1926         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1927       }
1928       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1929       for (j=0; j<mat->cmap->N; j++) {
1930         if (tmp2[j] > *norm) *norm = tmp2[j];
1931       }
1932       ierr = PetscFree(tmp);CHKERRQ(ierr);
1933       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1934       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1935     } else if (type == NORM_INFINITY) { /* max row norm */
1936       PetscReal ntemp = 0.0;
1937       for (j=0; j<aij->A->rmap->n; j++) {
1938         v   = amat->a + amat->i[j];
1939         sum = 0.0;
1940         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1941           sum += PetscAbsScalar(*v); v++;
1942         }
1943         v = bmat->a + bmat->i[j];
1944         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1945           sum += PetscAbsScalar(*v); v++;
1946         }
1947         if (sum > ntemp) ntemp = sum;
1948       }
1949       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1950       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1951     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1952   }
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 #undef __FUNCT__
1957 #define __FUNCT__ "MatTranspose_MPIAIJ"
1958 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1959 {
1960   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1961   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1962   PetscErrorCode ierr;
1963   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1964   PetscInt       cstart = A->cmap->rstart,ncol;
1965   Mat            B;
1966   MatScalar      *array;
1967 
1968   PetscFunctionBegin;
1969   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1970 
1971   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1972   ai = Aloc->i; aj = Aloc->j;
1973   bi = Bloc->i; bj = Bloc->j;
1974   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1975     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1976     PetscSFNode          *oloc;
1977     PETSC_UNUSED PetscSF sf;
1978 
1979     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1980     /* compute d_nnz for preallocation */
1981     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1982     for (i=0; i<ai[ma]; i++) {
1983       d_nnz[aj[i]]++;
1984       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1985     }
1986     /* compute local off-diagonal contributions */
1987     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1988     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1989     /* map those to global */
1990     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1991     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1992     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1993     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1994     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1995     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1996     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1997 
1998     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1999     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2000     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2001     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2002     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2003     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2004   } else {
2005     B    = *matout;
2006     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2007     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2008   }
2009 
2010   /* copy over the A part */
2011   array = Aloc->a;
2012   row   = A->rmap->rstart;
2013   for (i=0; i<ma; i++) {
2014     ncol = ai[i+1]-ai[i];
2015     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2016     row++;
2017     array += ncol; aj += ncol;
2018   }
2019   aj = Aloc->j;
2020   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2021 
2022   /* copy over the B part */
2023   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2024   array = Bloc->a;
2025   row   = A->rmap->rstart;
2026   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2027   cols_tmp = cols;
2028   for (i=0; i<mb; i++) {
2029     ncol = bi[i+1]-bi[i];
2030     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2031     row++;
2032     array += ncol; cols_tmp += ncol;
2033   }
2034   ierr = PetscFree(cols);CHKERRQ(ierr);
2035 
2036   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2037   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2038   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2039     *matout = B;
2040   } else {
2041     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2042   }
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2048 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2049 {
2050   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2051   Mat            a    = aij->A,b = aij->B;
2052   PetscErrorCode ierr;
2053   PetscInt       s1,s2,s3;
2054 
2055   PetscFunctionBegin;
2056   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2057   if (rr) {
2058     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2059     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2060     /* Overlap communication with computation. */
2061     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2062   }
2063   if (ll) {
2064     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2065     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2066     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2067   }
2068   /* scale  the diagonal block */
2069   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2070 
2071   if (rr) {
2072     /* Do a scatter end and then right scale the off-diagonal block */
2073     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2074     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2075   }
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2081 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2082 {
2083   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2084   PetscErrorCode ierr;
2085 
2086   PetscFunctionBegin;
2087   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 #undef __FUNCT__
2092 #define __FUNCT__ "MatEqual_MPIAIJ"
2093 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2094 {
2095   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2096   Mat            a,b,c,d;
2097   PetscBool      flg;
2098   PetscErrorCode ierr;
2099 
2100   PetscFunctionBegin;
2101   a = matA->A; b = matA->B;
2102   c = matB->A; d = matB->B;
2103 
2104   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2105   if (flg) {
2106     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2107   }
2108   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 #undef __FUNCT__
2113 #define __FUNCT__ "MatCopy_MPIAIJ"
2114 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2115 {
2116   PetscErrorCode ierr;
2117   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2118   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2119 
2120   PetscFunctionBegin;
2121   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2122   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2123     /* because of the column compression in the off-processor part of the matrix a->B,
2124        the number of columns in a->B and b->B may be different, hence we cannot call
2125        the MatCopy() directly on the two parts. If need be, we can provide a more
2126        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2127        then copying the submatrices */
2128     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2129   } else {
2130     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2131     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2132   }
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 #undef __FUNCT__
2137 #define __FUNCT__ "MatSetUp_MPIAIJ"
2138 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2139 {
2140   PetscErrorCode ierr;
2141 
2142   PetscFunctionBegin;
2143   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 /*
2148    Computes the number of nonzeros per row needed for preallocation when X and Y
2149    have different nonzero structure.
2150 */
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2153 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2154 {
2155   PetscInt       i,j,k,nzx,nzy;
2156 
2157   PetscFunctionBegin;
2158   /* Set the number of nonzeros in the new matrix */
2159   for (i=0; i<m; i++) {
2160     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2161     nzx = xi[i+1] - xi[i];
2162     nzy = yi[i+1] - yi[i];
2163     nnz[i] = 0;
2164     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2165       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2166       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2167       nnz[i]++;
2168     }
2169     for (; k<nzy; k++) nnz[i]++;
2170   }
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2177 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2178 {
2179   PetscErrorCode ierr;
2180   PetscInt       m = Y->rmap->N;
2181   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2182   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2183 
2184   PetscFunctionBegin;
2185   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatAXPY_MPIAIJ"
2191 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2192 {
2193   PetscErrorCode ierr;
2194   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2195   PetscBLASInt   bnz,one=1;
2196   Mat_SeqAIJ     *x,*y;
2197 
2198   PetscFunctionBegin;
2199   if (str == SAME_NONZERO_PATTERN) {
2200     PetscScalar alpha = a;
2201     x    = (Mat_SeqAIJ*)xx->A->data;
2202     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2203     y    = (Mat_SeqAIJ*)yy->A->data;
2204     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2205     x    = (Mat_SeqAIJ*)xx->B->data;
2206     y    = (Mat_SeqAIJ*)yy->B->data;
2207     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2208     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2209     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2210   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2211     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2212   } else {
2213     Mat      B;
2214     PetscInt *nnz_d,*nnz_o;
2215     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2216     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2217     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2218     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2219     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2220     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2221     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2222     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2223     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2224     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2225     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2226     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2227     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2228     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2229   }
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2234 
2235 #undef __FUNCT__
2236 #define __FUNCT__ "MatConjugate_MPIAIJ"
2237 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2238 {
2239 #if defined(PETSC_USE_COMPLEX)
2240   PetscErrorCode ierr;
2241   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2242 
2243   PetscFunctionBegin;
2244   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2245   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2246 #else
2247   PetscFunctionBegin;
2248 #endif
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatRealPart_MPIAIJ"
2254 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2255 {
2256   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2257   PetscErrorCode ierr;
2258 
2259   PetscFunctionBegin;
2260   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2261   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2267 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2268 {
2269   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2270   PetscErrorCode ierr;
2271 
2272   PetscFunctionBegin;
2273   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2274   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 #undef __FUNCT__
2279 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2280 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2281 {
2282   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2283   PetscErrorCode ierr;
2284   PetscInt       i,*idxb = 0;
2285   PetscScalar    *va,*vb;
2286   Vec            vtmp;
2287 
2288   PetscFunctionBegin;
2289   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2290   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2291   if (idx) {
2292     for (i=0; i<A->rmap->n; i++) {
2293       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2294     }
2295   }
2296 
2297   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2298   if (idx) {
2299     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2300   }
2301   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2302   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2303 
2304   for (i=0; i<A->rmap->n; i++) {
2305     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2306       va[i] = vb[i];
2307       if (idx) idx[i] = a->garray[idxb[i]];
2308     }
2309   }
2310 
2311   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2312   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2313   ierr = PetscFree(idxb);CHKERRQ(ierr);
2314   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 #undef __FUNCT__
2319 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2320 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2321 {
2322   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2323   PetscErrorCode ierr;
2324   PetscInt       i,*idxb = 0;
2325   PetscScalar    *va,*vb;
2326   Vec            vtmp;
2327 
2328   PetscFunctionBegin;
2329   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2330   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2331   if (idx) {
2332     for (i=0; i<A->cmap->n; i++) {
2333       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2334     }
2335   }
2336 
2337   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2338   if (idx) {
2339     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2340   }
2341   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2342   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2343 
2344   for (i=0; i<A->rmap->n; i++) {
2345     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2346       va[i] = vb[i];
2347       if (idx) idx[i] = a->garray[idxb[i]];
2348     }
2349   }
2350 
2351   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2352   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2353   ierr = PetscFree(idxb);CHKERRQ(ierr);
2354   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2355   PetscFunctionReturn(0);
2356 }
2357 
2358 #undef __FUNCT__
2359 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2360 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2361 {
2362   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2363   PetscInt       n      = A->rmap->n;
2364   PetscInt       cstart = A->cmap->rstart;
2365   PetscInt       *cmap  = mat->garray;
2366   PetscInt       *diagIdx, *offdiagIdx;
2367   Vec            diagV, offdiagV;
2368   PetscScalar    *a, *diagA, *offdiagA;
2369   PetscInt       r;
2370   PetscErrorCode ierr;
2371 
2372   PetscFunctionBegin;
2373   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2374   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2375   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2376   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2377   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2378   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2379   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2380   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2381   for (r = 0; r < n; ++r) {
2382     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2383       a[r]   = diagA[r];
2384       idx[r] = cstart + diagIdx[r];
2385     } else {
2386       a[r]   = offdiagA[r];
2387       idx[r] = cmap[offdiagIdx[r]];
2388     }
2389   }
2390   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2391   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2392   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2393   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2394   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2395   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2401 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2402 {
2403   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2404   PetscInt       n      = A->rmap->n;
2405   PetscInt       cstart = A->cmap->rstart;
2406   PetscInt       *cmap  = mat->garray;
2407   PetscInt       *diagIdx, *offdiagIdx;
2408   Vec            diagV, offdiagV;
2409   PetscScalar    *a, *diagA, *offdiagA;
2410   PetscInt       r;
2411   PetscErrorCode ierr;
2412 
2413   PetscFunctionBegin;
2414   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2415   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2416   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2417   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2418   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2419   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2420   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2421   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2422   for (r = 0; r < n; ++r) {
2423     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2424       a[r]   = diagA[r];
2425       idx[r] = cstart + diagIdx[r];
2426     } else {
2427       a[r]   = offdiagA[r];
2428       idx[r] = cmap[offdiagIdx[r]];
2429     }
2430   }
2431   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2432   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2433   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2434   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2435   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2436   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2442 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2443 {
2444   PetscErrorCode ierr;
2445   Mat            *dummy;
2446 
2447   PetscFunctionBegin;
2448   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2449   *newmat = *dummy;
2450   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2456 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2457 {
2458   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2459   PetscErrorCode ierr;
2460 
2461   PetscFunctionBegin;
2462   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2463   A->factorerrortype = a->A->factorerrortype;
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2469 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2470 {
2471   PetscErrorCode ierr;
2472   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2473 
2474   PetscFunctionBegin;
2475   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2476   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2477   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2478   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 #undef __FUNCT__
2483 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2484 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2485 {
2486   PetscFunctionBegin;
2487   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2488   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #undef __FUNCT__
2493 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2494 /*@
2495    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2496 
2497    Collective on Mat
2498 
2499    Input Parameters:
2500 +    A - the matrix
2501 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2502 
2503  Level: advanced
2504 
2505 @*/
2506 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2507 {
2508   PetscErrorCode       ierr;
2509 
2510   PetscFunctionBegin;
2511   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2512   PetscFunctionReturn(0);
2513 }
2514 
2515 #undef __FUNCT__
2516 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2517 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2518 {
2519   PetscErrorCode       ierr;
2520   PetscBool            sc = PETSC_FALSE,flg;
2521 
2522   PetscFunctionBegin;
2523   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2524   ierr = PetscObjectOptionsBegin((PetscObject)A);
2525     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2526     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2527     if (flg) {
2528       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2529     }
2530   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 #undef __FUNCT__
2535 #define __FUNCT__ "MatShift_MPIAIJ"
2536 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2537 {
2538   PetscErrorCode ierr;
2539   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2540   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2541 
2542   PetscFunctionBegin;
2543   if (!Y->preallocated) {
2544     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2545   } else if (!aij->nz) {
2546     PetscInt nonew = aij->nonew;
2547     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2548     aij->nonew = nonew;
2549   }
2550   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 #undef __FUNCT__
2555 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2556 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2557 {
2558   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2559   PetscErrorCode ierr;
2560 
2561   PetscFunctionBegin;
2562   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2563   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2564   if (d) {
2565     PetscInt rstart;
2566     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2567     *d += rstart;
2568 
2569   }
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 
2574 /* -------------------------------------------------------------------*/
2575 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2576                                        MatGetRow_MPIAIJ,
2577                                        MatRestoreRow_MPIAIJ,
2578                                        MatMult_MPIAIJ,
2579                                 /* 4*/ MatMultAdd_MPIAIJ,
2580                                        MatMultTranspose_MPIAIJ,
2581                                        MatMultTransposeAdd_MPIAIJ,
2582                                        0,
2583                                        0,
2584                                        0,
2585                                 /*10*/ 0,
2586                                        0,
2587                                        0,
2588                                        MatSOR_MPIAIJ,
2589                                        MatTranspose_MPIAIJ,
2590                                 /*15*/ MatGetInfo_MPIAIJ,
2591                                        MatEqual_MPIAIJ,
2592                                        MatGetDiagonal_MPIAIJ,
2593                                        MatDiagonalScale_MPIAIJ,
2594                                        MatNorm_MPIAIJ,
2595                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2596                                        MatAssemblyEnd_MPIAIJ,
2597                                        MatSetOption_MPIAIJ,
2598                                        MatZeroEntries_MPIAIJ,
2599                                 /*24*/ MatZeroRows_MPIAIJ,
2600                                        0,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                 /*29*/ MatSetUp_MPIAIJ,
2605                                        0,
2606                                        0,
2607                                        MatGetDiagonalBlock_MPIAIJ,
2608                                        0,
2609                                 /*34*/ MatDuplicate_MPIAIJ,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                 /*39*/ MatAXPY_MPIAIJ,
2615                                        MatGetSubMatrices_MPIAIJ,
2616                                        MatIncreaseOverlap_MPIAIJ,
2617                                        MatGetValues_MPIAIJ,
2618                                        MatCopy_MPIAIJ,
2619                                 /*44*/ MatGetRowMax_MPIAIJ,
2620                                        MatScale_MPIAIJ,
2621                                        MatShift_MPIAIJ,
2622                                        MatDiagonalSet_MPIAIJ,
2623                                        MatZeroRowsColumns_MPIAIJ,
2624                                 /*49*/ MatSetRandom_MPIAIJ,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2630                                        0,
2631                                        MatSetUnfactored_MPIAIJ,
2632                                        MatPermute_MPIAIJ,
2633                                        0,
2634                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2635                                        MatDestroy_MPIAIJ,
2636                                        MatView_MPIAIJ,
2637                                        0,
2638                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2639                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2640                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2645                                        MatGetRowMinAbs_MPIAIJ,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                        0,
2650                                 /*75*/ MatFDColoringApply_AIJ,
2651                                        MatSetFromOptions_MPIAIJ,
2652                                        0,
2653                                        0,
2654                                        MatFindZeroDiagonals_MPIAIJ,
2655                                 /*80*/ 0,
2656                                        0,
2657                                        0,
2658                                 /*83*/ MatLoad_MPIAIJ,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2665                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2666                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2667                                        MatPtAP_MPIAIJ_MPIAIJ,
2668                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2669                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                 /*99*/ 0,
2675                                        0,
2676                                        0,
2677                                        MatConjugate_MPIAIJ,
2678                                        0,
2679                                 /*104*/MatSetValuesRow_MPIAIJ,
2680                                        MatRealPart_MPIAIJ,
2681                                        MatImaginaryPart_MPIAIJ,
2682                                        0,
2683                                        0,
2684                                 /*109*/0,
2685                                        0,
2686                                        MatGetRowMin_MPIAIJ,
2687                                        0,
2688                                        MatMissingDiagonal_MPIAIJ,
2689                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2690                                        0,
2691                                        MatGetGhosts_MPIAIJ,
2692                                        0,
2693                                        0,
2694                                 /*119*/0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                        MatGetMultiProcBlock_MPIAIJ,
2699                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2700                                        MatGetColumnNorms_MPIAIJ,
2701                                        MatInvertBlockDiagonal_MPIAIJ,
2702                                        0,
2703                                        MatGetSubMatricesMPI_MPIAIJ,
2704                                 /*129*/0,
2705                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2706                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2707                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2708                                        0,
2709                                 /*134*/0,
2710                                        0,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                 /*139*/MatSetBlockSizes_MPIAIJ,
2715                                        0,
2716                                        0,
2717                                        MatFDColoringSetUp_MPIXAIJ,
2718                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2719                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2720 };
2721 
2722 /* ----------------------------------------------------------------------------------------*/
2723 
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2726 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2727 {
2728   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2729   PetscErrorCode ierr;
2730 
2731   PetscFunctionBegin;
2732   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2733   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 #undef __FUNCT__
2738 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2739 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2740 {
2741   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2742   PetscErrorCode ierr;
2743 
2744   PetscFunctionBegin;
2745   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2746   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2747   PetscFunctionReturn(0);
2748 }
2749 
2750 #undef __FUNCT__
2751 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2752 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2753 {
2754   Mat_MPIAIJ     *b;
2755   PetscErrorCode ierr;
2756 
2757   PetscFunctionBegin;
2758   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2759   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2760   b = (Mat_MPIAIJ*)B->data;
2761 
2762   if (!B->preallocated) {
2763     /* Explicitly create 2 MATSEQAIJ matrices. */
2764     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2765     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2766     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2767     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2768     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2769     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2770     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2771     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2772     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2773     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2774   }
2775 
2776   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2777   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2778   B->preallocated = PETSC_TRUE;
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 #undef __FUNCT__
2783 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2784 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2785 {
2786   Mat            mat;
2787   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2788   PetscErrorCode ierr;
2789 
2790   PetscFunctionBegin;
2791   *newmat = 0;
2792   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2793   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2794   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2795   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2796   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2797   a       = (Mat_MPIAIJ*)mat->data;
2798 
2799   mat->factortype   = matin->factortype;
2800   mat->assembled    = PETSC_TRUE;
2801   mat->insertmode   = NOT_SET_VALUES;
2802   mat->preallocated = PETSC_TRUE;
2803 
2804   a->size         = oldmat->size;
2805   a->rank         = oldmat->rank;
2806   a->donotstash   = oldmat->donotstash;
2807   a->roworiented  = oldmat->roworiented;
2808   a->rowindices   = 0;
2809   a->rowvalues    = 0;
2810   a->getrowactive = PETSC_FALSE;
2811 
2812   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2813   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2814 
2815   if (oldmat->colmap) {
2816 #if defined(PETSC_USE_CTABLE)
2817     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2818 #else
2819     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2820     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2821     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2822 #endif
2823   } else a->colmap = 0;
2824   if (oldmat->garray) {
2825     PetscInt len;
2826     len  = oldmat->B->cmap->n;
2827     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2828     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2829     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2830   } else a->garray = 0;
2831 
2832   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2833   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2834   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2835   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2836   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2837   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2838   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2839   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2840   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2841   *newmat = mat;
2842   PetscFunctionReturn(0);
2843 }
2844 
2845 
2846 
2847 #undef __FUNCT__
2848 #define __FUNCT__ "MatLoad_MPIAIJ"
2849 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2850 {
2851   PetscScalar    *vals,*svals;
2852   MPI_Comm       comm;
2853   PetscErrorCode ierr;
2854   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2855   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2856   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2857   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2858   PetscInt       cend,cstart,n,*rowners;
2859   int            fd;
2860   PetscInt       bs = newMat->rmap->bs;
2861 
2862   PetscFunctionBegin;
2863   /* force binary viewer to load .info file if it has not yet done so */
2864   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2865   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2866   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2867   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2868   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2869   if (!rank) {
2870     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2871     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2872     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2873   }
2874 
2875   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2876   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2877   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2878   if (bs < 0) bs = 1;
2879 
2880   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2881   M    = header[1]; N = header[2];
2882 
2883   /* If global sizes are set, check if they are consistent with that given in the file */
2884   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2885   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2886 
2887   /* determine ownership of all (block) rows */
2888   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2889   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2890   else m = newMat->rmap->n; /* Set by user */
2891 
2892   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2893   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2894 
2895   /* First process needs enough room for process with most rows */
2896   if (!rank) {
2897     mmax = rowners[1];
2898     for (i=2; i<=size; i++) {
2899       mmax = PetscMax(mmax, rowners[i]);
2900     }
2901   } else mmax = -1;             /* unused, but compilers complain */
2902 
2903   rowners[0] = 0;
2904   for (i=2; i<=size; i++) {
2905     rowners[i] += rowners[i-1];
2906   }
2907   rstart = rowners[rank];
2908   rend   = rowners[rank+1];
2909 
2910   /* distribute row lengths to all processors */
2911   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2912   if (!rank) {
2913     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2914     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2915     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2916     for (j=0; j<m; j++) {
2917       procsnz[0] += ourlens[j];
2918     }
2919     for (i=1; i<size; i++) {
2920       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2921       /* calculate the number of nonzeros on each processor */
2922       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2923         procsnz[i] += rowlengths[j];
2924       }
2925       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2926     }
2927     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2928   } else {
2929     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2930   }
2931 
2932   if (!rank) {
2933     /* determine max buffer needed and allocate it */
2934     maxnz = 0;
2935     for (i=0; i<size; i++) {
2936       maxnz = PetscMax(maxnz,procsnz[i]);
2937     }
2938     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2939 
2940     /* read in my part of the matrix column indices  */
2941     nz   = procsnz[0];
2942     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2943     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2944 
2945     /* read in every one elses and ship off */
2946     for (i=1; i<size; i++) {
2947       nz   = procsnz[i];
2948       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2949       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2950     }
2951     ierr = PetscFree(cols);CHKERRQ(ierr);
2952   } else {
2953     /* determine buffer space needed for message */
2954     nz = 0;
2955     for (i=0; i<m; i++) {
2956       nz += ourlens[i];
2957     }
2958     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2959 
2960     /* receive message of column indices*/
2961     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2962   }
2963 
2964   /* determine column ownership if matrix is not square */
2965   if (N != M) {
2966     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2967     else n = newMat->cmap->n;
2968     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2969     cstart = cend - n;
2970   } else {
2971     cstart = rstart;
2972     cend   = rend;
2973     n      = cend - cstart;
2974   }
2975 
2976   /* loop over local rows, determining number of off diagonal entries */
2977   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2978   jj   = 0;
2979   for (i=0; i<m; i++) {
2980     for (j=0; j<ourlens[i]; j++) {
2981       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2982       jj++;
2983     }
2984   }
2985 
2986   for (i=0; i<m; i++) {
2987     ourlens[i] -= offlens[i];
2988   }
2989   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2990 
2991   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2992 
2993   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2994 
2995   for (i=0; i<m; i++) {
2996     ourlens[i] += offlens[i];
2997   }
2998 
2999   if (!rank) {
3000     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3001 
3002     /* read in my part of the matrix numerical values  */
3003     nz   = procsnz[0];
3004     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3005 
3006     /* insert into matrix */
3007     jj      = rstart;
3008     smycols = mycols;
3009     svals   = vals;
3010     for (i=0; i<m; i++) {
3011       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3012       smycols += ourlens[i];
3013       svals   += ourlens[i];
3014       jj++;
3015     }
3016 
3017     /* read in other processors and ship out */
3018     for (i=1; i<size; i++) {
3019       nz   = procsnz[i];
3020       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3021       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3022     }
3023     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3024   } else {
3025     /* receive numeric values */
3026     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3027 
3028     /* receive message of values*/
3029     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3030 
3031     /* insert into matrix */
3032     jj      = rstart;
3033     smycols = mycols;
3034     svals   = vals;
3035     for (i=0; i<m; i++) {
3036       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3037       smycols += ourlens[i];
3038       svals   += ourlens[i];
3039       jj++;
3040     }
3041   }
3042   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3043   ierr = PetscFree(vals);CHKERRQ(ierr);
3044   ierr = PetscFree(mycols);CHKERRQ(ierr);
3045   ierr = PetscFree(rowners);CHKERRQ(ierr);
3046   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3047   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3048   PetscFunctionReturn(0);
3049 }
3050 
3051 #undef __FUNCT__
3052 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3053 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3054 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3055 {
3056   PetscErrorCode ierr;
3057   IS             iscol_local;
3058   PetscInt       csize;
3059 
3060   PetscFunctionBegin;
3061   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3062   if (call == MAT_REUSE_MATRIX) {
3063     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3064     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3065   } else {
3066     /* check if we are grabbing all columns*/
3067     PetscBool    isstride;
3068     PetscMPIInt  lisstride = 0,gisstride;
3069     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3070     if (isstride) {
3071       PetscInt  start,len,mstart,mlen;
3072       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3073       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3074       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3075       if (mstart == start && mlen-mstart == len) lisstride = 1;
3076     }
3077     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3078     if (gisstride) {
3079       PetscInt N;
3080       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3081       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3082       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3083       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3084     } else {
3085       PetscInt cbs;
3086       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3087       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3088       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3089     }
3090   }
3091   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3092   if (call == MAT_INITIAL_MATRIX) {
3093     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3094     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3095   }
3096   PetscFunctionReturn(0);
3097 }
3098 
3099 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3100 #undef __FUNCT__
3101 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3102 /*
3103     Not great since it makes two copies of the submatrix, first an SeqAIJ
3104   in local and then by concatenating the local matrices the end result.
3105   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3106 
3107   Note: This requires a sequential iscol with all indices.
3108 */
3109 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3110 {
3111   PetscErrorCode ierr;
3112   PetscMPIInt    rank,size;
3113   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3114   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3115   PetscBool      allcolumns, colflag;
3116   Mat            M,Mreuse;
3117   MatScalar      *vwork,*aa;
3118   MPI_Comm       comm;
3119   Mat_SeqAIJ     *aij;
3120 
3121   PetscFunctionBegin;
3122   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3123   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3124   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3125 
3126   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3127   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3128   if (colflag && ncol == mat->cmap->N) {
3129     allcolumns = PETSC_TRUE;
3130     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3131   } else {
3132     allcolumns = PETSC_FALSE;
3133   }
3134   if (call ==  MAT_REUSE_MATRIX) {
3135     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3136     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3137     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3138   } else {
3139     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3140   }
3141 
3142   /*
3143       m - number of local rows
3144       n - number of columns (same on all processors)
3145       rstart - first row in new global matrix generated
3146   */
3147   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3148   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3149   if (call == MAT_INITIAL_MATRIX) {
3150     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3151     ii  = aij->i;
3152     jj  = aij->j;
3153 
3154     /*
3155         Determine the number of non-zeros in the diagonal and off-diagonal
3156         portions of the matrix in order to do correct preallocation
3157     */
3158 
3159     /* first get start and end of "diagonal" columns */
3160     if (csize == PETSC_DECIDE) {
3161       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3162       if (mglobal == n) { /* square matrix */
3163         nlocal = m;
3164       } else {
3165         nlocal = n/size + ((n % size) > rank);
3166       }
3167     } else {
3168       nlocal = csize;
3169     }
3170     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3171     rstart = rend - nlocal;
3172     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3173 
3174     /* next, compute all the lengths */
3175     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3176     olens = dlens + m;
3177     for (i=0; i<m; i++) {
3178       jend = ii[i+1] - ii[i];
3179       olen = 0;
3180       dlen = 0;
3181       for (j=0; j<jend; j++) {
3182         if (*jj < rstart || *jj >= rend) olen++;
3183         else dlen++;
3184         jj++;
3185       }
3186       olens[i] = olen;
3187       dlens[i] = dlen;
3188     }
3189     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3190     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3191     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3192     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3193     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3194     ierr = PetscFree(dlens);CHKERRQ(ierr);
3195   } else {
3196     PetscInt ml,nl;
3197 
3198     M    = *newmat;
3199     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3200     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3201     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3202     /*
3203          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3204        rather than the slower MatSetValues().
3205     */
3206     M->was_assembled = PETSC_TRUE;
3207     M->assembled     = PETSC_FALSE;
3208   }
3209   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3210   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3211   ii   = aij->i;
3212   jj   = aij->j;
3213   aa   = aij->a;
3214   for (i=0; i<m; i++) {
3215     row   = rstart + i;
3216     nz    = ii[i+1] - ii[i];
3217     cwork = jj;     jj += nz;
3218     vwork = aa;     aa += nz;
3219     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3220   }
3221 
3222   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3223   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3224   *newmat = M;
3225 
3226   /* save submatrix used in processor for next request */
3227   if (call ==  MAT_INITIAL_MATRIX) {
3228     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3229     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3230   }
3231   PetscFunctionReturn(0);
3232 }
3233 
3234 #undef __FUNCT__
3235 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3236 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3237 {
3238   PetscInt       m,cstart, cend,j,nnz,i,d;
3239   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3240   const PetscInt *JJ;
3241   PetscScalar    *values;
3242   PetscErrorCode ierr;
3243 
3244   PetscFunctionBegin;
3245   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3246 
3247   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3248   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3249   m      = B->rmap->n;
3250   cstart = B->cmap->rstart;
3251   cend   = B->cmap->rend;
3252   rstart = B->rmap->rstart;
3253 
3254   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3255 
3256 #if defined(PETSC_USE_DEBUGGING)
3257   for (i=0; i<m; i++) {
3258     nnz = Ii[i+1]- Ii[i];
3259     JJ  = J + Ii[i];
3260     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3261     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3262     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3263   }
3264 #endif
3265 
3266   for (i=0; i<m; i++) {
3267     nnz     = Ii[i+1]- Ii[i];
3268     JJ      = J + Ii[i];
3269     nnz_max = PetscMax(nnz_max,nnz);
3270     d       = 0;
3271     for (j=0; j<nnz; j++) {
3272       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3273     }
3274     d_nnz[i] = d;
3275     o_nnz[i] = nnz - d;
3276   }
3277   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3278   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3279 
3280   if (v) values = (PetscScalar*)v;
3281   else {
3282     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3283   }
3284 
3285   for (i=0; i<m; i++) {
3286     ii   = i + rstart;
3287     nnz  = Ii[i+1]- Ii[i];
3288     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3289   }
3290   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3291   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3292 
3293   if (!v) {
3294     ierr = PetscFree(values);CHKERRQ(ierr);
3295   }
3296   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3297   PetscFunctionReturn(0);
3298 }
3299 
3300 #undef __FUNCT__
3301 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3302 /*@
3303    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3304    (the default parallel PETSc format).
3305 
3306    Collective on MPI_Comm
3307 
3308    Input Parameters:
3309 +  B - the matrix
3310 .  i - the indices into j for the start of each local row (starts with zero)
3311 .  j - the column indices for each local row (starts with zero)
3312 -  v - optional values in the matrix
3313 
3314    Level: developer
3315 
3316    Notes:
3317        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3318      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3319      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3320 
3321        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3322 
3323        The format which is used for the sparse matrix input, is equivalent to a
3324     row-major ordering.. i.e for the following matrix, the input data expected is
3325     as shown
3326 
3327 $        1 0 0
3328 $        2 0 3     P0
3329 $       -------
3330 $        4 5 6     P1
3331 $
3332 $     Process0 [P0]: rows_owned=[0,1]
3333 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3334 $        j =  {0,0,2}  [size = 3]
3335 $        v =  {1,2,3}  [size = 3]
3336 $
3337 $     Process1 [P1]: rows_owned=[2]
3338 $        i =  {0,3}    [size = nrow+1  = 1+1]
3339 $        j =  {0,1,2}  [size = 3]
3340 $        v =  {4,5,6}  [size = 3]
3341 
3342 .keywords: matrix, aij, compressed row, sparse, parallel
3343 
3344 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3345           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3346 @*/
3347 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3348 {
3349   PetscErrorCode ierr;
3350 
3351   PetscFunctionBegin;
3352   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3353   PetscFunctionReturn(0);
3354 }
3355 
3356 #undef __FUNCT__
3357 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3358 /*@C
3359    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3360    (the default parallel PETSc format).  For good matrix assembly performance
3361    the user should preallocate the matrix storage by setting the parameters
3362    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3363    performance can be increased by more than a factor of 50.
3364 
3365    Collective on MPI_Comm
3366 
3367    Input Parameters:
3368 +  B - the matrix
3369 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3370            (same value is used for all local rows)
3371 .  d_nnz - array containing the number of nonzeros in the various rows of the
3372            DIAGONAL portion of the local submatrix (possibly different for each row)
3373            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3374            The size of this array is equal to the number of local rows, i.e 'm'.
3375            For matrices that will be factored, you must leave room for (and set)
3376            the diagonal entry even if it is zero.
3377 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3378            submatrix (same value is used for all local rows).
3379 -  o_nnz - array containing the number of nonzeros in the various rows of the
3380            OFF-DIAGONAL portion of the local submatrix (possibly different for
3381            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3382            structure. The size of this array is equal to the number
3383            of local rows, i.e 'm'.
3384 
3385    If the *_nnz parameter is given then the *_nz parameter is ignored
3386 
3387    The AIJ format (also called the Yale sparse matrix format or
3388    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3389    storage.  The stored row and column indices begin with zero.
3390    See Users-Manual: ch_mat for details.
3391 
3392    The parallel matrix is partitioned such that the first m0 rows belong to
3393    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3394    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3395 
3396    The DIAGONAL portion of the local submatrix of a processor can be defined
3397    as the submatrix which is obtained by extraction the part corresponding to
3398    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3399    first row that belongs to the processor, r2 is the last row belonging to
3400    the this processor, and c1-c2 is range of indices of the local part of a
3401    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3402    common case of a square matrix, the row and column ranges are the same and
3403    the DIAGONAL part is also square. The remaining portion of the local
3404    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3405 
3406    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3407 
3408    You can call MatGetInfo() to get information on how effective the preallocation was;
3409    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3410    You can also run with the option -info and look for messages with the string
3411    malloc in them to see if additional memory allocation was needed.
3412 
3413    Example usage:
3414 
3415    Consider the following 8x8 matrix with 34 non-zero values, that is
3416    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3417    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3418    as follows:
3419 
3420 .vb
3421             1  2  0  |  0  3  0  |  0  4
3422     Proc0   0  5  6  |  7  0  0  |  8  0
3423             9  0 10  | 11  0  0  | 12  0
3424     -------------------------------------
3425            13  0 14  | 15 16 17  |  0  0
3426     Proc1   0 18  0  | 19 20 21  |  0  0
3427             0  0  0  | 22 23  0  | 24  0
3428     -------------------------------------
3429     Proc2  25 26 27  |  0  0 28  | 29  0
3430            30  0  0  | 31 32 33  |  0 34
3431 .ve
3432 
3433    This can be represented as a collection of submatrices as:
3434 
3435 .vb
3436       A B C
3437       D E F
3438       G H I
3439 .ve
3440 
3441    Where the submatrices A,B,C are owned by proc0, D,E,F are
3442    owned by proc1, G,H,I are owned by proc2.
3443 
3444    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3445    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3446    The 'M','N' parameters are 8,8, and have the same values on all procs.
3447 
3448    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3449    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3450    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3451    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3452    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3453    matrix, ans [DF] as another SeqAIJ matrix.
3454 
3455    When d_nz, o_nz parameters are specified, d_nz storage elements are
3456    allocated for every row of the local diagonal submatrix, and o_nz
3457    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3458    One way to choose d_nz and o_nz is to use the max nonzerors per local
3459    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3460    In this case, the values of d_nz,o_nz are:
3461 .vb
3462      proc0 : dnz = 2, o_nz = 2
3463      proc1 : dnz = 3, o_nz = 2
3464      proc2 : dnz = 1, o_nz = 4
3465 .ve
3466    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3467    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3468    for proc3. i.e we are using 12+15+10=37 storage locations to store
3469    34 values.
3470 
3471    When d_nnz, o_nnz parameters are specified, the storage is specified
3472    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3473    In the above case the values for d_nnz,o_nnz are:
3474 .vb
3475      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3476      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3477      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3478 .ve
3479    Here the space allocated is sum of all the above values i.e 34, and
3480    hence pre-allocation is perfect.
3481 
3482    Level: intermediate
3483 
3484 .keywords: matrix, aij, compressed row, sparse, parallel
3485 
3486 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3487           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3488 @*/
3489 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3490 {
3491   PetscErrorCode ierr;
3492 
3493   PetscFunctionBegin;
3494   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3495   PetscValidType(B,1);
3496   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3497   PetscFunctionReturn(0);
3498 }
3499 
3500 #undef __FUNCT__
3501 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3502 /*@
3503      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3504          CSR format the local rows.
3505 
3506    Collective on MPI_Comm
3507 
3508    Input Parameters:
3509 +  comm - MPI communicator
3510 .  m - number of local rows (Cannot be PETSC_DECIDE)
3511 .  n - This value should be the same as the local size used in creating the
3512        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3513        calculated if N is given) For square matrices n is almost always m.
3514 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3515 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3516 .   i - row indices
3517 .   j - column indices
3518 -   a - matrix values
3519 
3520    Output Parameter:
3521 .   mat - the matrix
3522 
3523    Level: intermediate
3524 
3525    Notes:
3526        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3527      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3528      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3529 
3530        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3531 
3532        The format which is used for the sparse matrix input, is equivalent to a
3533     row-major ordering.. i.e for the following matrix, the input data expected is
3534     as shown
3535 
3536 $        1 0 0
3537 $        2 0 3     P0
3538 $       -------
3539 $        4 5 6     P1
3540 $
3541 $     Process0 [P0]: rows_owned=[0,1]
3542 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3543 $        j =  {0,0,2}  [size = 3]
3544 $        v =  {1,2,3}  [size = 3]
3545 $
3546 $     Process1 [P1]: rows_owned=[2]
3547 $        i =  {0,3}    [size = nrow+1  = 1+1]
3548 $        j =  {0,1,2}  [size = 3]
3549 $        v =  {4,5,6}  [size = 3]
3550 
3551 .keywords: matrix, aij, compressed row, sparse, parallel
3552 
3553 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3554           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3555 @*/
3556 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3557 {
3558   PetscErrorCode ierr;
3559 
3560   PetscFunctionBegin;
3561   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3562   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3563   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3564   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3565   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3566   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3567   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3568   PetscFunctionReturn(0);
3569 }
3570 
3571 #undef __FUNCT__
3572 #define __FUNCT__ "MatCreateAIJ"
3573 /*@C
3574    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3575    (the default parallel PETSc format).  For good matrix assembly performance
3576    the user should preallocate the matrix storage by setting the parameters
3577    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3578    performance can be increased by more than a factor of 50.
3579 
3580    Collective on MPI_Comm
3581 
3582    Input Parameters:
3583 +  comm - MPI communicator
3584 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3585            This value should be the same as the local size used in creating the
3586            y vector for the matrix-vector product y = Ax.
3587 .  n - This value should be the same as the local size used in creating the
3588        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3589        calculated if N is given) For square matrices n is almost always m.
3590 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3591 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3592 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3593            (same value is used for all local rows)
3594 .  d_nnz - array containing the number of nonzeros in the various rows of the
3595            DIAGONAL portion of the local submatrix (possibly different for each row)
3596            or NULL, if d_nz is used to specify the nonzero structure.
3597            The size of this array is equal to the number of local rows, i.e 'm'.
3598 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3599            submatrix (same value is used for all local rows).
3600 -  o_nnz - array containing the number of nonzeros in the various rows of the
3601            OFF-DIAGONAL portion of the local submatrix (possibly different for
3602            each row) or NULL, if o_nz is used to specify the nonzero
3603            structure. The size of this array is equal to the number
3604            of local rows, i.e 'm'.
3605 
3606    Output Parameter:
3607 .  A - the matrix
3608 
3609    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3610    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3611    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3612 
3613    Notes:
3614    If the *_nnz parameter is given then the *_nz parameter is ignored
3615 
3616    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3617    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3618    storage requirements for this matrix.
3619 
3620    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3621    processor than it must be used on all processors that share the object for
3622    that argument.
3623 
3624    The user MUST specify either the local or global matrix dimensions
3625    (possibly both).
3626 
3627    The parallel matrix is partitioned across processors such that the
3628    first m0 rows belong to process 0, the next m1 rows belong to
3629    process 1, the next m2 rows belong to process 2 etc.. where
3630    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3631    values corresponding to [m x N] submatrix.
3632 
3633    The columns are logically partitioned with the n0 columns belonging
3634    to 0th partition, the next n1 columns belonging to the next
3635    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3636 
3637    The DIAGONAL portion of the local submatrix on any given processor
3638    is the submatrix corresponding to the rows and columns m,n
3639    corresponding to the given processor. i.e diagonal matrix on
3640    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3641    etc. The remaining portion of the local submatrix [m x (N-n)]
3642    constitute the OFF-DIAGONAL portion. The example below better
3643    illustrates this concept.
3644 
3645    For a square global matrix we define each processor's diagonal portion
3646    to be its local rows and the corresponding columns (a square submatrix);
3647    each processor's off-diagonal portion encompasses the remainder of the
3648    local matrix (a rectangular submatrix).
3649 
3650    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3651 
3652    When calling this routine with a single process communicator, a matrix of
3653    type SEQAIJ is returned.  If a matrix of type MATMPIAIJ is desired for this
3654    type of communicator, use the construction mechanism:
3655      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3656 
3657    By default, this format uses inodes (identical nodes) when possible.
3658    We search for consecutive rows with the same nonzero structure, thereby
3659    reusing matrix information to achieve increased efficiency.
3660 
3661    Options Database Keys:
3662 +  -mat_no_inode  - Do not use inodes
3663 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3664 -  -mat_aij_oneindex - Internally use indexing starting at 1
3665         rather than 0.  Note that when calling MatSetValues(),
3666         the user still MUST index entries starting at 0!
3667 
3668 
3669    Example usage:
3670 
3671    Consider the following 8x8 matrix with 34 non-zero values, that is
3672    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3673    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3674    as follows:
3675 
3676 .vb
3677             1  2  0  |  0  3  0  |  0  4
3678     Proc0   0  5  6  |  7  0  0  |  8  0
3679             9  0 10  | 11  0  0  | 12  0
3680     -------------------------------------
3681            13  0 14  | 15 16 17  |  0  0
3682     Proc1   0 18  0  | 19 20 21  |  0  0
3683             0  0  0  | 22 23  0  | 24  0
3684     -------------------------------------
3685     Proc2  25 26 27  |  0  0 28  | 29  0
3686            30  0  0  | 31 32 33  |  0 34
3687 .ve
3688 
3689    This can be represented as a collection of submatrices as:
3690 
3691 .vb
3692       A B C
3693       D E F
3694       G H I
3695 .ve
3696 
3697    Where the submatrices A,B,C are owned by proc0, D,E,F are
3698    owned by proc1, G,H,I are owned by proc2.
3699 
3700    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3701    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3702    The 'M','N' parameters are 8,8, and have the same values on all procs.
3703 
3704    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3705    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3706    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3707    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3708    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3709    matrix, ans [DF] as another SeqAIJ matrix.
3710 
3711    When d_nz, o_nz parameters are specified, d_nz storage elements are
3712    allocated for every row of the local diagonal submatrix, and o_nz
3713    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3714    One way to choose d_nz and o_nz is to use the max nonzerors per local
3715    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3716    In this case, the values of d_nz,o_nz are:
3717 .vb
3718      proc0 : dnz = 2, o_nz = 2
3719      proc1 : dnz = 3, o_nz = 2
3720      proc2 : dnz = 1, o_nz = 4
3721 .ve
3722    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3723    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3724    for proc3. i.e we are using 12+15+10=37 storage locations to store
3725    34 values.
3726 
3727    When d_nnz, o_nnz parameters are specified, the storage is specified
3728    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3729    In the above case the values for d_nnz,o_nnz are:
3730 .vb
3731      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3732      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3733      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3734 .ve
3735    Here the space allocated is sum of all the above values i.e 34, and
3736    hence pre-allocation is perfect.
3737 
3738    Level: intermediate
3739 
3740 .keywords: matrix, aij, compressed row, sparse, parallel
3741 
3742 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3743           MATMPIAIJ, MatCreateMPIAIJWithArrays()
3744 @*/
3745 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3746 {
3747   PetscErrorCode ierr;
3748   PetscMPIInt    size;
3749 
3750   PetscFunctionBegin;
3751   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3752   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3753   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3754   if (size > 1) {
3755     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3756     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3757   } else {
3758     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3759     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3760   }
3761   PetscFunctionReturn(0);
3762 }
3763 
3764 #undef __FUNCT__
3765 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3766 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3767 {
3768   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3769   PetscBool      flg;
3770   PetscErrorCode ierr;
3771 
3772   PetscFunctionBegin;
3773   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3774   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
3775   if (Ad)     *Ad     = a->A;
3776   if (Ao)     *Ao     = a->B;
3777   if (colmap) *colmap = a->garray;
3778   PetscFunctionReturn(0);
3779 }
3780 
3781 #undef __FUNCT__
3782 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3783 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3784 {
3785   PetscErrorCode ierr;
3786   PetscInt       m,N,i,rstart,nnz,Ii;
3787   PetscInt       *indx;
3788   PetscScalar    *values;
3789 
3790   PetscFunctionBegin;
3791   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3792   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3793     PetscInt       *dnz,*onz,sum,bs,cbs;
3794 
3795     if (n == PETSC_DECIDE) {
3796       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3797     }
3798     /* Check sum(n) = N */
3799     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3800     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3801 
3802     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3803     rstart -= m;
3804 
3805     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3806     for (i=0; i<m; i++) {
3807       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3808       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3809       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3810     }
3811 
3812     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3813     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3814     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3815     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3816     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3817     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3818     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3819   }
3820 
3821   /* numeric phase */
3822   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3823   for (i=0; i<m; i++) {
3824     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3825     Ii   = i + rstart;
3826     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3827     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3828   }
3829   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3831   PetscFunctionReturn(0);
3832 }
3833 
3834 #undef __FUNCT__
3835 #define __FUNCT__ "MatFileSplit"
3836 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3837 {
3838   PetscErrorCode    ierr;
3839   PetscMPIInt       rank;
3840   PetscInt          m,N,i,rstart,nnz;
3841   size_t            len;
3842   const PetscInt    *indx;
3843   PetscViewer       out;
3844   char              *name;
3845   Mat               B;
3846   const PetscScalar *values;
3847 
3848   PetscFunctionBegin;
3849   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3850   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3851   /* Should this be the type of the diagonal block of A? */
3852   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3853   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3854   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3855   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3856   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3857   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3858   for (i=0; i<m; i++) {
3859     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3860     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3861     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3862   }
3863   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3864   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3865 
3866   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3867   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3868   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3869   sprintf(name,"%s.%d",outfile,rank);
3870   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3871   ierr = PetscFree(name);CHKERRQ(ierr);
3872   ierr = MatView(B,out);CHKERRQ(ierr);
3873   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3874   ierr = MatDestroy(&B);CHKERRQ(ierr);
3875   PetscFunctionReturn(0);
3876 }
3877 
3878 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3879 #undef __FUNCT__
3880 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3881 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3882 {
3883   PetscErrorCode      ierr;
3884   Mat_Merge_SeqsToMPI *merge;
3885   PetscContainer      container;
3886 
3887   PetscFunctionBegin;
3888   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3889   if (container) {
3890     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3891     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3892     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3893     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3894     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3895     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3896     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3897     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3898     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3899     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3900     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3901     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3902     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3903     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3904     ierr = PetscFree(merge);CHKERRQ(ierr);
3905     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3906   }
3907   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3908   PetscFunctionReturn(0);
3909 }
3910 
3911 #include <../src/mat/utils/freespace.h>
3912 #include <petscbt.h>
3913 
3914 #undef __FUNCT__
3915 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3916 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3917 {
3918   PetscErrorCode      ierr;
3919   MPI_Comm            comm;
3920   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3921   PetscMPIInt         size,rank,taga,*len_s;
3922   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3923   PetscInt            proc,m;
3924   PetscInt            **buf_ri,**buf_rj;
3925   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3926   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3927   MPI_Request         *s_waits,*r_waits;
3928   MPI_Status          *status;
3929   MatScalar           *aa=a->a;
3930   MatScalar           **abuf_r,*ba_i;
3931   Mat_Merge_SeqsToMPI *merge;
3932   PetscContainer      container;
3933 
3934   PetscFunctionBegin;
3935   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3936   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3937 
3938   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3939   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3940 
3941   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3942   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3943 
3944   bi     = merge->bi;
3945   bj     = merge->bj;
3946   buf_ri = merge->buf_ri;
3947   buf_rj = merge->buf_rj;
3948 
3949   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3950   owners = merge->rowmap->range;
3951   len_s  = merge->len_s;
3952 
3953   /* send and recv matrix values */
3954   /*-----------------------------*/
3955   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3956   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3957 
3958   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
3959   for (proc=0,k=0; proc<size; proc++) {
3960     if (!len_s[proc]) continue;
3961     i    = owners[proc];
3962     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3963     k++;
3964   }
3965 
3966   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3967   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3968   ierr = PetscFree(status);CHKERRQ(ierr);
3969 
3970   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3971   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3972 
3973   /* insert mat values of mpimat */
3974   /*----------------------------*/
3975   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
3976   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
3977 
3978   for (k=0; k<merge->nrecv; k++) {
3979     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3980     nrows       = *(buf_ri_k[k]);
3981     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3982     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
3983   }
3984 
3985   /* set values of ba */
3986   m = merge->rowmap->n;
3987   for (i=0; i<m; i++) {
3988     arow = owners[rank] + i;
3989     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3990     bnzi = bi[i+1] - bi[i];
3991     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3992 
3993     /* add local non-zero vals of this proc's seqmat into ba */
3994     anzi   = ai[arow+1] - ai[arow];
3995     aj     = a->j + ai[arow];
3996     aa     = a->a + ai[arow];
3997     nextaj = 0;
3998     for (j=0; nextaj<anzi; j++) {
3999       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4000         ba_i[j] += aa[nextaj++];
4001       }
4002     }
4003 
4004     /* add received vals into ba */
4005     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4006       /* i-th row */
4007       if (i == *nextrow[k]) {
4008         anzi   = *(nextai[k]+1) - *nextai[k];
4009         aj     = buf_rj[k] + *(nextai[k]);
4010         aa     = abuf_r[k] + *(nextai[k]);
4011         nextaj = 0;
4012         for (j=0; nextaj<anzi; j++) {
4013           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4014             ba_i[j] += aa[nextaj++];
4015           }
4016         }
4017         nextrow[k]++; nextai[k]++;
4018       }
4019     }
4020     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4021   }
4022   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4023   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4024 
4025   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4026   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4027   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4028   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4029   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4030   PetscFunctionReturn(0);
4031 }
4032 
4033 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4034 
4035 #undef __FUNCT__
4036 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4037 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4038 {
4039   PetscErrorCode      ierr;
4040   Mat                 B_mpi;
4041   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4042   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4043   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4044   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4045   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4046   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4047   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4048   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4049   MPI_Status          *status;
4050   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4051   PetscBT             lnkbt;
4052   Mat_Merge_SeqsToMPI *merge;
4053   PetscContainer      container;
4054 
4055   PetscFunctionBegin;
4056   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4057 
4058   /* make sure it is a PETSc comm */
4059   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4060   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4061   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4062 
4063   ierr = PetscNew(&merge);CHKERRQ(ierr);
4064   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4065 
4066   /* determine row ownership */
4067   /*---------------------------------------------------------*/
4068   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4069   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4070   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4071   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4072   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4073   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4074   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4075 
4076   m      = merge->rowmap->n;
4077   owners = merge->rowmap->range;
4078 
4079   /* determine the number of messages to send, their lengths */
4080   /*---------------------------------------------------------*/
4081   len_s = merge->len_s;
4082 
4083   len          = 0; /* length of buf_si[] */
4084   merge->nsend = 0;
4085   for (proc=0; proc<size; proc++) {
4086     len_si[proc] = 0;
4087     if (proc == rank) {
4088       len_s[proc] = 0;
4089     } else {
4090       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4091       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4092     }
4093     if (len_s[proc]) {
4094       merge->nsend++;
4095       nrows = 0;
4096       for (i=owners[proc]; i<owners[proc+1]; i++) {
4097         if (ai[i+1] > ai[i]) nrows++;
4098       }
4099       len_si[proc] = 2*(nrows+1);
4100       len         += len_si[proc];
4101     }
4102   }
4103 
4104   /* determine the number and length of messages to receive for ij-structure */
4105   /*-------------------------------------------------------------------------*/
4106   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4107   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4108 
4109   /* post the Irecv of j-structure */
4110   /*-------------------------------*/
4111   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4112   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4113 
4114   /* post the Isend of j-structure */
4115   /*--------------------------------*/
4116   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4117 
4118   for (proc=0, k=0; proc<size; proc++) {
4119     if (!len_s[proc]) continue;
4120     i    = owners[proc];
4121     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4122     k++;
4123   }
4124 
4125   /* receives and sends of j-structure are complete */
4126   /*------------------------------------------------*/
4127   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4128   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4129 
4130   /* send and recv i-structure */
4131   /*---------------------------*/
4132   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4133   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4134 
4135   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4136   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4137   for (proc=0,k=0; proc<size; proc++) {
4138     if (!len_s[proc]) continue;
4139     /* form outgoing message for i-structure:
4140          buf_si[0]:                 nrows to be sent
4141                [1:nrows]:           row index (global)
4142                [nrows+1:2*nrows+1]: i-structure index
4143     */
4144     /*-------------------------------------------*/
4145     nrows       = len_si[proc]/2 - 1;
4146     buf_si_i    = buf_si + nrows+1;
4147     buf_si[0]   = nrows;
4148     buf_si_i[0] = 0;
4149     nrows       = 0;
4150     for (i=owners[proc]; i<owners[proc+1]; i++) {
4151       anzi = ai[i+1] - ai[i];
4152       if (anzi) {
4153         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4154         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4155         nrows++;
4156       }
4157     }
4158     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4159     k++;
4160     buf_si += len_si[proc];
4161   }
4162 
4163   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4164   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4165 
4166   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4167   for (i=0; i<merge->nrecv; i++) {
4168     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4169   }
4170 
4171   ierr = PetscFree(len_si);CHKERRQ(ierr);
4172   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4173   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4174   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4175   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4176   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4177   ierr = PetscFree(status);CHKERRQ(ierr);
4178 
4179   /* compute a local seq matrix in each processor */
4180   /*----------------------------------------------*/
4181   /* allocate bi array and free space for accumulating nonzero column info */
4182   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4183   bi[0] = 0;
4184 
4185   /* create and initialize a linked list */
4186   nlnk = N+1;
4187   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4188 
4189   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4190   len  = ai[owners[rank+1]] - ai[owners[rank]];
4191   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4192 
4193   current_space = free_space;
4194 
4195   /* determine symbolic info for each local row */
4196   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4197 
4198   for (k=0; k<merge->nrecv; k++) {
4199     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4200     nrows       = *buf_ri_k[k];
4201     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4202     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4203   }
4204 
4205   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4206   len  = 0;
4207   for (i=0; i<m; i++) {
4208     bnzi = 0;
4209     /* add local non-zero cols of this proc's seqmat into lnk */
4210     arow  = owners[rank] + i;
4211     anzi  = ai[arow+1] - ai[arow];
4212     aj    = a->j + ai[arow];
4213     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4214     bnzi += nlnk;
4215     /* add received col data into lnk */
4216     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4217       if (i == *nextrow[k]) { /* i-th row */
4218         anzi  = *(nextai[k]+1) - *nextai[k];
4219         aj    = buf_rj[k] + *nextai[k];
4220         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4221         bnzi += nlnk;
4222         nextrow[k]++; nextai[k]++;
4223       }
4224     }
4225     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4226 
4227     /* if free space is not available, make more free space */
4228     if (current_space->local_remaining<bnzi) {
4229       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4230       nspacedouble++;
4231     }
4232     /* copy data into free space, then initialize lnk */
4233     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4234     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4235 
4236     current_space->array           += bnzi;
4237     current_space->local_used      += bnzi;
4238     current_space->local_remaining -= bnzi;
4239 
4240     bi[i+1] = bi[i] + bnzi;
4241   }
4242 
4243   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4244 
4245   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4246   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4247   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4248 
4249   /* create symbolic parallel matrix B_mpi */
4250   /*---------------------------------------*/
4251   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4252   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4253   if (n==PETSC_DECIDE) {
4254     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4255   } else {
4256     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4257   }
4258   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4259   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4260   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4261   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4262   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4263 
4264   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4265   B_mpi->assembled    = PETSC_FALSE;
4266   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4267   merge->bi           = bi;
4268   merge->bj           = bj;
4269   merge->buf_ri       = buf_ri;
4270   merge->buf_rj       = buf_rj;
4271   merge->coi          = NULL;
4272   merge->coj          = NULL;
4273   merge->owners_co    = NULL;
4274 
4275   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4276 
4277   /* attach the supporting struct to B_mpi for reuse */
4278   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4279   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4280   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4281   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4282   *mpimat = B_mpi;
4283 
4284   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4285   PetscFunctionReturn(0);
4286 }
4287 
4288 #undef __FUNCT__
4289 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4290 /*@C
4291       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4292                  matrices from each processor
4293 
4294     Collective on MPI_Comm
4295 
4296    Input Parameters:
4297 +    comm - the communicators the parallel matrix will live on
4298 .    seqmat - the input sequential matrices
4299 .    m - number of local rows (or PETSC_DECIDE)
4300 .    n - number of local columns (or PETSC_DECIDE)
4301 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4302 
4303    Output Parameter:
4304 .    mpimat - the parallel matrix generated
4305 
4306     Level: advanced
4307 
4308    Notes:
4309      The dimensions of the sequential matrix in each processor MUST be the same.
4310      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4311      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4312 @*/
4313 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4314 {
4315   PetscErrorCode ierr;
4316   PetscMPIInt    size;
4317 
4318   PetscFunctionBegin;
4319   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4320   if (size == 1) {
4321     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4322     if (scall == MAT_INITIAL_MATRIX) {
4323       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4324     } else {
4325       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4326     }
4327     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4328     PetscFunctionReturn(0);
4329   }
4330   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4331   if (scall == MAT_INITIAL_MATRIX) {
4332     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4333   }
4334   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4335   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4336   PetscFunctionReturn(0);
4337 }
4338 
4339 #undef __FUNCT__
4340 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4341 /*@
4342      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4343           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4344           with MatGetSize()
4345 
4346     Not Collective
4347 
4348    Input Parameters:
4349 +    A - the matrix
4350 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4351 
4352    Output Parameter:
4353 .    A_loc - the local sequential matrix generated
4354 
4355     Level: developer
4356 
4357 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4358 
4359 @*/
4360 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4361 {
4362   PetscErrorCode ierr;
4363   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4364   Mat_SeqAIJ     *mat,*a,*b;
4365   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4366   MatScalar      *aa,*ba,*cam;
4367   PetscScalar    *ca;
4368   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4369   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4370   PetscBool      match;
4371   MPI_Comm       comm;
4372   PetscMPIInt    size;
4373 
4374   PetscFunctionBegin;
4375   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4376   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4377   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4378   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4379   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4380 
4381   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4382   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4383   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4384   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4385   aa = a->a; ba = b->a;
4386   if (scall == MAT_INITIAL_MATRIX) {
4387     if (size == 1) {
4388       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4389       PetscFunctionReturn(0);
4390     }
4391 
4392     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4393     ci[0] = 0;
4394     for (i=0; i<am; i++) {
4395       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4396     }
4397     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4398     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4399     k    = 0;
4400     for (i=0; i<am; i++) {
4401       ncols_o = bi[i+1] - bi[i];
4402       ncols_d = ai[i+1] - ai[i];
4403       /* off-diagonal portion of A */
4404       for (jo=0; jo<ncols_o; jo++) {
4405         col = cmap[*bj];
4406         if (col >= cstart) break;
4407         cj[k]   = col; bj++;
4408         ca[k++] = *ba++;
4409       }
4410       /* diagonal portion of A */
4411       for (j=0; j<ncols_d; j++) {
4412         cj[k]   = cstart + *aj++;
4413         ca[k++] = *aa++;
4414       }
4415       /* off-diagonal portion of A */
4416       for (j=jo; j<ncols_o; j++) {
4417         cj[k]   = cmap[*bj++];
4418         ca[k++] = *ba++;
4419       }
4420     }
4421     /* put together the new matrix */
4422     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4423     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4424     /* Since these are PETSc arrays, change flags to free them as necessary. */
4425     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4426     mat->free_a  = PETSC_TRUE;
4427     mat->free_ij = PETSC_TRUE;
4428     mat->nonew   = 0;
4429   } else if (scall == MAT_REUSE_MATRIX) {
4430     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4431     ci = mat->i; cj = mat->j; cam = mat->a;
4432     for (i=0; i<am; i++) {
4433       /* off-diagonal portion of A */
4434       ncols_o = bi[i+1] - bi[i];
4435       for (jo=0; jo<ncols_o; jo++) {
4436         col = cmap[*bj];
4437         if (col >= cstart) break;
4438         *cam++ = *ba++; bj++;
4439       }
4440       /* diagonal portion of A */
4441       ncols_d = ai[i+1] - ai[i];
4442       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4443       /* off-diagonal portion of A */
4444       for (j=jo; j<ncols_o; j++) {
4445         *cam++ = *ba++; bj++;
4446       }
4447     }
4448   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4449   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4450   PetscFunctionReturn(0);
4451 }
4452 
4453 #undef __FUNCT__
4454 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4455 /*@C
4456      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4457 
4458     Not Collective
4459 
4460    Input Parameters:
4461 +    A - the matrix
4462 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4463 -    row, col - index sets of rows and columns to extract (or NULL)
4464 
4465    Output Parameter:
4466 .    A_loc - the local sequential matrix generated
4467 
4468     Level: developer
4469 
4470 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4471 
4472 @*/
4473 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4474 {
4475   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4476   PetscErrorCode ierr;
4477   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4478   IS             isrowa,iscola;
4479   Mat            *aloc;
4480   PetscBool      match;
4481 
4482   PetscFunctionBegin;
4483   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4484   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4485   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4486   if (!row) {
4487     start = A->rmap->rstart; end = A->rmap->rend;
4488     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4489   } else {
4490     isrowa = *row;
4491   }
4492   if (!col) {
4493     start = A->cmap->rstart;
4494     cmap  = a->garray;
4495     nzA   = a->A->cmap->n;
4496     nzB   = a->B->cmap->n;
4497     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4498     ncols = 0;
4499     for (i=0; i<nzB; i++) {
4500       if (cmap[i] < start) idx[ncols++] = cmap[i];
4501       else break;
4502     }
4503     imark = i;
4504     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4505     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4506     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4507   } else {
4508     iscola = *col;
4509   }
4510   if (scall != MAT_INITIAL_MATRIX) {
4511     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4512     aloc[0] = *A_loc;
4513   }
4514   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4515   *A_loc = aloc[0];
4516   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4517   if (!row) {
4518     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4519   }
4520   if (!col) {
4521     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4522   }
4523   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4524   PetscFunctionReturn(0);
4525 }
4526 
4527 #undef __FUNCT__
4528 #define __FUNCT__ "MatGetBrowsOfAcols"
4529 /*@C
4530     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4531 
4532     Collective on Mat
4533 
4534    Input Parameters:
4535 +    A,B - the matrices in mpiaij format
4536 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4537 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4538 
4539    Output Parameter:
4540 +    rowb, colb - index sets of rows and columns of B to extract
4541 -    B_seq - the sequential matrix generated
4542 
4543     Level: developer
4544 
4545 @*/
4546 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4547 {
4548   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4549   PetscErrorCode ierr;
4550   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4551   IS             isrowb,iscolb;
4552   Mat            *bseq=NULL;
4553 
4554   PetscFunctionBegin;
4555   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4556     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4557   }
4558   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4559 
4560   if (scall == MAT_INITIAL_MATRIX) {
4561     start = A->cmap->rstart;
4562     cmap  = a->garray;
4563     nzA   = a->A->cmap->n;
4564     nzB   = a->B->cmap->n;
4565     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4566     ncols = 0;
4567     for (i=0; i<nzB; i++) {  /* row < local row index */
4568       if (cmap[i] < start) idx[ncols++] = cmap[i];
4569       else break;
4570     }
4571     imark = i;
4572     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4573     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4574     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4575     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4576   } else {
4577     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4578     isrowb  = *rowb; iscolb = *colb;
4579     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4580     bseq[0] = *B_seq;
4581   }
4582   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4583   *B_seq = bseq[0];
4584   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4585   if (!rowb) {
4586     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4587   } else {
4588     *rowb = isrowb;
4589   }
4590   if (!colb) {
4591     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4592   } else {
4593     *colb = iscolb;
4594   }
4595   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4596   PetscFunctionReturn(0);
4597 }
4598 
4599 #undef __FUNCT__
4600 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4601 /*
4602     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4603     of the OFF-DIAGONAL portion of local A
4604 
4605     Collective on Mat
4606 
4607    Input Parameters:
4608 +    A,B - the matrices in mpiaij format
4609 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4610 
4611    Output Parameter:
4612 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4613 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4614 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4615 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4616 
4617     Level: developer
4618 
4619 */
4620 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4621 {
4622   VecScatter_MPI_General *gen_to,*gen_from;
4623   PetscErrorCode         ierr;
4624   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4625   Mat_SeqAIJ             *b_oth;
4626   VecScatter             ctx =a->Mvctx;
4627   MPI_Comm               comm;
4628   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4629   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4630   PetscScalar            *rvalues,*svalues;
4631   MatScalar              *b_otha,*bufa,*bufA;
4632   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4633   MPI_Request            *rwaits = NULL,*swaits = NULL;
4634   MPI_Status             *sstatus,rstatus;
4635   PetscMPIInt            jj,size;
4636   PetscInt               *cols,sbs,rbs;
4637   PetscScalar            *vals;
4638 
4639   PetscFunctionBegin;
4640   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4641   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4642 
4643   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4644     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4645   }
4646   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4647   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4648 
4649   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4650   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4651   rvalues  = gen_from->values; /* holds the length of receiving row */
4652   svalues  = gen_to->values;   /* holds the length of sending row */
4653   nrecvs   = gen_from->n;
4654   nsends   = gen_to->n;
4655 
4656   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4657   srow    = gen_to->indices;    /* local row index to be sent */
4658   sstarts = gen_to->starts;
4659   sprocs  = gen_to->procs;
4660   sstatus = gen_to->sstatus;
4661   sbs     = gen_to->bs;
4662   rstarts = gen_from->starts;
4663   rprocs  = gen_from->procs;
4664   rbs     = gen_from->bs;
4665 
4666   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4667   if (scall == MAT_INITIAL_MATRIX) {
4668     /* i-array */
4669     /*---------*/
4670     /*  post receives */
4671     for (i=0; i<nrecvs; i++) {
4672       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4673       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4674       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4675     }
4676 
4677     /* pack the outgoing message */
4678     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4679 
4680     sstartsj[0] = 0;
4681     rstartsj[0] = 0;
4682     len         = 0; /* total length of j or a array to be sent */
4683     k           = 0;
4684     for (i=0; i<nsends; i++) {
4685       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4686       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4687       for (j=0; j<nrows; j++) {
4688         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4689         for (l=0; l<sbs; l++) {
4690           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4691 
4692           rowlen[j*sbs+l] = ncols;
4693 
4694           len += ncols;
4695           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4696         }
4697         k++;
4698       }
4699       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4700 
4701       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4702     }
4703     /* recvs and sends of i-array are completed */
4704     i = nrecvs;
4705     while (i--) {
4706       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4707     }
4708     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4709 
4710     /* allocate buffers for sending j and a arrays */
4711     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4712     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4713 
4714     /* create i-array of B_oth */
4715     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4716 
4717     b_othi[0] = 0;
4718     len       = 0; /* total length of j or a array to be received */
4719     k         = 0;
4720     for (i=0; i<nrecvs; i++) {
4721       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4722       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4723       for (j=0; j<nrows; j++) {
4724         b_othi[k+1] = b_othi[k] + rowlen[j];
4725         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4726         k++;
4727       }
4728       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4729     }
4730 
4731     /* allocate space for j and a arrrays of B_oth */
4732     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4733     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4734 
4735     /* j-array */
4736     /*---------*/
4737     /*  post receives of j-array */
4738     for (i=0; i<nrecvs; i++) {
4739       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4740       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4741     }
4742 
4743     /* pack the outgoing message j-array */
4744     k = 0;
4745     for (i=0; i<nsends; i++) {
4746       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4747       bufJ  = bufj+sstartsj[i];
4748       for (j=0; j<nrows; j++) {
4749         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4750         for (ll=0; ll<sbs; ll++) {
4751           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4752           for (l=0; l<ncols; l++) {
4753             *bufJ++ = cols[l];
4754           }
4755           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4756         }
4757       }
4758       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4759     }
4760 
4761     /* recvs and sends of j-array are completed */
4762     i = nrecvs;
4763     while (i--) {
4764       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4765     }
4766     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4767   } else if (scall == MAT_REUSE_MATRIX) {
4768     sstartsj = *startsj_s;
4769     rstartsj = *startsj_r;
4770     bufa     = *bufa_ptr;
4771     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4772     b_otha   = b_oth->a;
4773   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4774 
4775   /* a-array */
4776   /*---------*/
4777   /*  post receives of a-array */
4778   for (i=0; i<nrecvs; i++) {
4779     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4780     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4781   }
4782 
4783   /* pack the outgoing message a-array */
4784   k = 0;
4785   for (i=0; i<nsends; i++) {
4786     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4787     bufA  = bufa+sstartsj[i];
4788     for (j=0; j<nrows; j++) {
4789       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4790       for (ll=0; ll<sbs; ll++) {
4791         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4792         for (l=0; l<ncols; l++) {
4793           *bufA++ = vals[l];
4794         }
4795         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4796       }
4797     }
4798     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4799   }
4800   /* recvs and sends of a-array are completed */
4801   i = nrecvs;
4802   while (i--) {
4803     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4804   }
4805   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4806   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4807 
4808   if (scall == MAT_INITIAL_MATRIX) {
4809     /* put together the new matrix */
4810     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4811 
4812     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4813     /* Since these are PETSc arrays, change flags to free them as necessary. */
4814     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4815     b_oth->free_a  = PETSC_TRUE;
4816     b_oth->free_ij = PETSC_TRUE;
4817     b_oth->nonew   = 0;
4818 
4819     ierr = PetscFree(bufj);CHKERRQ(ierr);
4820     if (!startsj_s || !bufa_ptr) {
4821       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4822       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4823     } else {
4824       *startsj_s = sstartsj;
4825       *startsj_r = rstartsj;
4826       *bufa_ptr  = bufa;
4827     }
4828   }
4829   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4830   PetscFunctionReturn(0);
4831 }
4832 
4833 #undef __FUNCT__
4834 #define __FUNCT__ "MatGetCommunicationStructs"
4835 /*@C
4836   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4837 
4838   Not Collective
4839 
4840   Input Parameters:
4841 . A - The matrix in mpiaij format
4842 
4843   Output Parameter:
4844 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4845 . colmap - A map from global column index to local index into lvec
4846 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4847 
4848   Level: developer
4849 
4850 @*/
4851 #if defined(PETSC_USE_CTABLE)
4852 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4853 #else
4854 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4855 #endif
4856 {
4857   Mat_MPIAIJ *a;
4858 
4859   PetscFunctionBegin;
4860   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4861   PetscValidPointer(lvec, 2);
4862   PetscValidPointer(colmap, 3);
4863   PetscValidPointer(multScatter, 4);
4864   a = (Mat_MPIAIJ*) A->data;
4865   if (lvec) *lvec = a->lvec;
4866   if (colmap) *colmap = a->colmap;
4867   if (multScatter) *multScatter = a->Mvctx;
4868   PetscFunctionReturn(0);
4869 }
4870 
4871 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4872 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4873 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4874 #if defined(PETSC_HAVE_ELEMENTAL)
4875 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4876 #endif
4877 
4878 #undef __FUNCT__
4879 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4880 /*
4881     Computes (B'*A')' since computing B*A directly is untenable
4882 
4883                n                       p                          p
4884         (              )       (              )         (                  )
4885       m (      A       )  *  n (       B      )   =   m (         C        )
4886         (              )       (              )         (                  )
4887 
4888 */
4889 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4890 {
4891   PetscErrorCode ierr;
4892   Mat            At,Bt,Ct;
4893 
4894   PetscFunctionBegin;
4895   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4896   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4897   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4898   ierr = MatDestroy(&At);CHKERRQ(ierr);
4899   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4900   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4901   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4902   PetscFunctionReturn(0);
4903 }
4904 
4905 #undef __FUNCT__
4906 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4907 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4908 {
4909   PetscErrorCode ierr;
4910   PetscInt       m=A->rmap->n,n=B->cmap->n;
4911   Mat            Cmat;
4912 
4913   PetscFunctionBegin;
4914   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4915   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4916   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4917   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4918   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4919   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4920   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4921   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4922 
4923   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4924 
4925   *C = Cmat;
4926   PetscFunctionReturn(0);
4927 }
4928 
4929 /* ----------------------------------------------------------------*/
4930 #undef __FUNCT__
4931 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4932 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4933 {
4934   PetscErrorCode ierr;
4935 
4936   PetscFunctionBegin;
4937   if (scall == MAT_INITIAL_MATRIX) {
4938     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4939     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4940     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4941   }
4942   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4943   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4944   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4945   PetscFunctionReturn(0);
4946 }
4947 
4948 /*MC
4949    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4950 
4951    Options Database Keys:
4952 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4953 
4954   Level: beginner
4955 
4956 .seealso: MatCreateAIJ()
4957 M*/
4958 
4959 #undef __FUNCT__
4960 #define __FUNCT__ "MatCreate_MPIAIJ"
4961 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
4962 {
4963   Mat_MPIAIJ     *b;
4964   PetscErrorCode ierr;
4965   PetscMPIInt    size;
4966 
4967   PetscFunctionBegin;
4968   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
4969 
4970   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
4971   B->data       = (void*)b;
4972   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4973   B->assembled  = PETSC_FALSE;
4974   B->insertmode = NOT_SET_VALUES;
4975   b->size       = size;
4976 
4977   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
4978 
4979   /* build cache for off array entries formed */
4980   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
4981 
4982   b->donotstash  = PETSC_FALSE;
4983   b->colmap      = 0;
4984   b->garray      = 0;
4985   b->roworiented = PETSC_TRUE;
4986 
4987   /* stuff used for matrix vector multiply */
4988   b->lvec  = NULL;
4989   b->Mvctx = NULL;
4990 
4991   /* stuff for MatGetRow() */
4992   b->rowindices   = 0;
4993   b->rowvalues    = 0;
4994   b->getrowactive = PETSC_FALSE;
4995 
4996   /* flexible pointer used in CUSP/CUSPARSE classes */
4997   b->spptr = NULL;
4998 
4999   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5000   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5001   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5002   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5003   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5004   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5005   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5006   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5007   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5008   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5009 #if defined(PETSC_HAVE_ELEMENTAL)
5010   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5011 #endif
5012   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5013   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5014   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5015   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5016   PetscFunctionReturn(0);
5017 }
5018 
5019 #undef __FUNCT__
5020 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5021 /*@C
5022      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5023          and "off-diagonal" part of the matrix in CSR format.
5024 
5025    Collective on MPI_Comm
5026 
5027    Input Parameters:
5028 +  comm - MPI communicator
5029 .  m - number of local rows (Cannot be PETSC_DECIDE)
5030 .  n - This value should be the same as the local size used in creating the
5031        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5032        calculated if N is given) For square matrices n is almost always m.
5033 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5034 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5035 .   i - row indices for "diagonal" portion of matrix
5036 .   j - column indices
5037 .   a - matrix values
5038 .   oi - row indices for "off-diagonal" portion of matrix
5039 .   oj - column indices
5040 -   oa - matrix values
5041 
5042    Output Parameter:
5043 .   mat - the matrix
5044 
5045    Level: advanced
5046 
5047    Notes:
5048        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5049        must free the arrays once the matrix has been destroyed and not before.
5050 
5051        The i and j indices are 0 based
5052 
5053        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5054 
5055        This sets local rows and cannot be used to set off-processor values.
5056 
5057        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5058        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5059        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5060        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5061        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5062        communication if it is known that only local entries will be set.
5063 
5064 .keywords: matrix, aij, compressed row, sparse, parallel
5065 
5066 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5067           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5068 @*/
5069 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5070 {
5071   PetscErrorCode ierr;
5072   Mat_MPIAIJ     *maij;
5073 
5074   PetscFunctionBegin;
5075   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5076   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5077   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5078   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5079   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5080   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5081   maij = (Mat_MPIAIJ*) (*mat)->data;
5082 
5083   (*mat)->preallocated = PETSC_TRUE;
5084 
5085   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5086   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5087 
5088   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5089   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5090 
5091   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5092   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5093   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5094   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5095 
5096   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5097   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5098   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5099   PetscFunctionReturn(0);
5100 }
5101 
5102 /*
5103     Special version for direct calls from Fortran
5104 */
5105 #include <petsc/private/fortranimpl.h>
5106 
5107 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5108 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5109 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5110 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5111 #endif
5112 
5113 /* Change these macros so can be used in void function */
5114 #undef CHKERRQ
5115 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5116 #undef SETERRQ2
5117 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5118 #undef SETERRQ3
5119 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5120 #undef SETERRQ
5121 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5122 
5123 #undef __FUNCT__
5124 #define __FUNCT__ "matsetvaluesmpiaij_"
5125 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5126 {
5127   Mat            mat  = *mmat;
5128   PetscInt       m    = *mm, n = *mn;
5129   InsertMode     addv = *maddv;
5130   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5131   PetscScalar    value;
5132   PetscErrorCode ierr;
5133 
5134   MatCheckPreallocated(mat,1);
5135   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5136 
5137 #if defined(PETSC_USE_DEBUG)
5138   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5139 #endif
5140   {
5141     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5142     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5143     PetscBool roworiented = aij->roworiented;
5144 
5145     /* Some Variables required in the macro */
5146     Mat        A                 = aij->A;
5147     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5148     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5149     MatScalar  *aa               = a->a;
5150     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5151     Mat        B                 = aij->B;
5152     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5153     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5154     MatScalar  *ba               = b->a;
5155 
5156     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5157     PetscInt  nonew = a->nonew;
5158     MatScalar *ap1,*ap2;
5159 
5160     PetscFunctionBegin;
5161     for (i=0; i<m; i++) {
5162       if (im[i] < 0) continue;
5163 #if defined(PETSC_USE_DEBUG)
5164       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5165 #endif
5166       if (im[i] >= rstart && im[i] < rend) {
5167         row      = im[i] - rstart;
5168         lastcol1 = -1;
5169         rp1      = aj + ai[row];
5170         ap1      = aa + ai[row];
5171         rmax1    = aimax[row];
5172         nrow1    = ailen[row];
5173         low1     = 0;
5174         high1    = nrow1;
5175         lastcol2 = -1;
5176         rp2      = bj + bi[row];
5177         ap2      = ba + bi[row];
5178         rmax2    = bimax[row];
5179         nrow2    = bilen[row];
5180         low2     = 0;
5181         high2    = nrow2;
5182 
5183         for (j=0; j<n; j++) {
5184           if (roworiented) value = v[i*n+j];
5185           else value = v[i+j*m];
5186           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5187           if (in[j] >= cstart && in[j] < cend) {
5188             col = in[j] - cstart;
5189             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5190           } else if (in[j] < 0) continue;
5191 #if defined(PETSC_USE_DEBUG)
5192           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5193 #endif
5194           else {
5195             if (mat->was_assembled) {
5196               if (!aij->colmap) {
5197                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5198               }
5199 #if defined(PETSC_USE_CTABLE)
5200               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5201               col--;
5202 #else
5203               col = aij->colmap[in[j]] - 1;
5204 #endif
5205               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5206                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5207                 col  =  in[j];
5208                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5209                 B     = aij->B;
5210                 b     = (Mat_SeqAIJ*)B->data;
5211                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5212                 rp2   = bj + bi[row];
5213                 ap2   = ba + bi[row];
5214                 rmax2 = bimax[row];
5215                 nrow2 = bilen[row];
5216                 low2  = 0;
5217                 high2 = nrow2;
5218                 bm    = aij->B->rmap->n;
5219                 ba    = b->a;
5220               }
5221             } else col = in[j];
5222             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5223           }
5224         }
5225       } else if (!aij->donotstash) {
5226         if (roworiented) {
5227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5228         } else {
5229           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5230         }
5231       }
5232     }
5233   }
5234   PetscFunctionReturnVoid();
5235 }
5236 
5237