xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f99b5f5956ad544412cd3beb01fbb1d5b7dfd047)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatSetBlockSizes_MPIAIJ"
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
62 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
63 {
64   PetscErrorCode  ierr;
65   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
66   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
67   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
68   const PetscInt  *ia,*ib;
69   const MatScalar *aa,*bb;
70   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
71   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
72 
73   PetscFunctionBegin;
74   *keptrows = 0;
75   ia        = a->i;
76   ib        = b->i;
77   for (i=0; i<m; i++) {
78     na = ia[i+1] - ia[i];
79     nb = ib[i+1] - ib[i];
80     if (!na && !nb) {
81       cnt++;
82       goto ok1;
83     }
84     aa = a->a + ia[i];
85     for (j=0; j<na; j++) {
86       if (aa[j] != 0.0) goto ok1;
87     }
88     bb = b->a + ib[i];
89     for (j=0; j <nb; j++) {
90       if (bb[j] != 0.0) goto ok1;
91     }
92     cnt++;
93 ok1:;
94   }
95   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
96   if (!n0rows) PetscFunctionReturn(0);
97   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
98   cnt  = 0;
99   for (i=0; i<m; i++) {
100     na = ia[i+1] - ia[i];
101     nb = ib[i+1] - ib[i];
102     if (!na && !nb) continue;
103     aa = a->a + ia[i];
104     for (j=0; j<na;j++) {
105       if (aa[j] != 0.0) {
106         rows[cnt++] = rstart + i;
107         goto ok2;
108       }
109     }
110     bb = b->a + ib[i];
111     for (j=0; j<nb; j++) {
112       if (bb[j] != 0.0) {
113         rows[cnt++] = rstart + i;
114         goto ok2;
115       }
116     }
117 ok2:;
118   }
119   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
125 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
126 {
127   PetscErrorCode    ierr;
128   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
129 
130   PetscFunctionBegin;
131   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
132     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
133   } else {
134     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
135   }
136   PetscFunctionReturn(0);
137 }
138 
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
142 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
143 {
144   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
145   PetscErrorCode ierr;
146   PetscInt       i,rstart,nrows,*rows;
147 
148   PetscFunctionBegin;
149   *zrows = NULL;
150   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
151   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
152   for (i=0; i<nrows; i++) rows[i] += rstart;
153   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
159 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
160 {
161   PetscErrorCode ierr;
162   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
163   PetscInt       i,n,*garray = aij->garray;
164   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
165   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
166   PetscReal      *work;
167 
168   PetscFunctionBegin;
169   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
170   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
171   if (type == NORM_2) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
177     }
178   } else if (type == NORM_1) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
184     }
185   } else if (type == NORM_INFINITY) {
186     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
187       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
188     }
189     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
190       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
191     }
192 
193   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
194   if (type == NORM_INFINITY) {
195     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
196   } else {
197     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
198   }
199   ierr = PetscFree(work);CHKERRQ(ierr);
200   if (type == NORM_2) {
201     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
202   }
203   PetscFunctionReturn(0);
204 }
205 
206 #undef __FUNCT__
207 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
208 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
209 {
210   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
211   IS              sis,gis;
212   PetscErrorCode  ierr;
213   const PetscInt  *isis,*igis;
214   PetscInt        n,*iis,nsis,ngis,rstart,i;
215 
216   PetscFunctionBegin;
217   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
218   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
219   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
220   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
221   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
223 
224   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
225   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
226   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
227   n    = ngis + nsis;
228   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
230   for (i=0; i<n; i++) iis[i] += rstart;
231   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
232 
233   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
234   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
235   ierr = ISDestroy(&sis);CHKERRQ(ierr);
236   ierr = ISDestroy(&gis);CHKERRQ(ierr);
237   PetscFunctionReturn(0);
238 }
239 
240 #undef __FUNCT__
241 #define __FUNCT__ "MatDistribute_MPIAIJ"
242 /*
243     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
244     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
245 
246     Only for square matrices
247 
248     Used by a preconditioner, hence PETSC_EXTERN
249 */
250 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
251 {
252   PetscMPIInt    rank,size;
253   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
254   PetscErrorCode ierr;
255   Mat            mat;
256   Mat_SeqAIJ     *gmata;
257   PetscMPIInt    tag;
258   MPI_Status     status;
259   PetscBool      aij;
260   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
261 
262   PetscFunctionBegin;
263   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
264   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
265   if (!rank) {
266     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
267     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
268   }
269   if (reuse == MAT_INITIAL_MATRIX) {
270     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
271     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
272     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
273     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
274     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
275     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
276     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
277     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
278     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
279 
280     rowners[0] = 0;
281     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
282     rstart = rowners[rank];
283     rend   = rowners[rank+1];
284     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
285     if (!rank) {
286       gmata = (Mat_SeqAIJ*) gmat->data;
287       /* send row lengths to all processors */
288       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
289       for (i=1; i<size; i++) {
290         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
291       }
292       /* determine number diagonal and off-diagonal counts */
293       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
294       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
295       jj   = 0;
296       for (i=0; i<m; i++) {
297         for (j=0; j<dlens[i]; j++) {
298           if (gmata->j[jj] < rstart) ld[i]++;
299           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
300           jj++;
301         }
302       }
303       /* send column indices to other processes */
304       for (i=1; i<size; i++) {
305         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
306         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
307         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
308       }
309 
310       /* send numerical values to other processes */
311       for (i=1; i<size; i++) {
312         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
313         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
314       }
315       gmataa = gmata->a;
316       gmataj = gmata->j;
317 
318     } else {
319       /* receive row lengths */
320       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
321       /* receive column indices */
322       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
323       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
325       /* determine number diagonal and off-diagonal counts */
326       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
327       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
328       jj   = 0;
329       for (i=0; i<m; i++) {
330         for (j=0; j<dlens[i]; j++) {
331           if (gmataj[jj] < rstart) ld[i]++;
332           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
333           jj++;
334         }
335       }
336       /* receive numerical values */
337       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
338       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
339     }
340     /* set preallocation */
341     for (i=0; i<m; i++) {
342       dlens[i] -= olens[i];
343     }
344     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
345     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
346 
347     for (i=0; i<m; i++) {
348       dlens[i] += olens[i];
349     }
350     cnt = 0;
351     for (i=0; i<m; i++) {
352       row  = rstart + i;
353       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
354       cnt += dlens[i];
355     }
356     if (rank) {
357       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
358     }
359     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
360     ierr = PetscFree(rowners);CHKERRQ(ierr);
361 
362     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
363 
364     *inmat = mat;
365   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
366     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
367     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
368     mat  = *inmat;
369     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
370     if (!rank) {
371       /* send numerical values to other processes */
372       gmata  = (Mat_SeqAIJ*) gmat->data;
373       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
374       gmataa = gmata->a;
375       for (i=1; i<size; i++) {
376         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
377         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
378       }
379       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
380     } else {
381       /* receive numerical values from process 0*/
382       nz   = Ad->nz + Ao->nz;
383       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
384       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
385     }
386     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
387     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
388     ad = Ad->a;
389     ao = Ao->a;
390     if (mat->rmap->n) {
391       i  = 0;
392       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
393       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
394     }
395     for (i=1; i<mat->rmap->n; i++) {
396       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
397       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
398     }
399     i--;
400     if (mat->rmap->n) {
401       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (rank) {
404       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
405     }
406   }
407   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
408   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 /*
413   Local utility routine that creates a mapping from the global column
414 number to the local number in the off-diagonal part of the local
415 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
416 a slightly higher hash table cost; without it it is not scalable (each processor
417 has an order N integer array but is fast to acess.
418 */
419 #undef __FUNCT__
420 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
421 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
422 {
423   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
424   PetscErrorCode ierr;
425   PetscInt       n = aij->B->cmap->n,i;
426 
427   PetscFunctionBegin;
428   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
429 #if defined(PETSC_USE_CTABLE)
430   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
431   for (i=0; i<n; i++) {
432     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
433   }
434 #else
435   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
436   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
437   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
438 #endif
439   PetscFunctionReturn(0);
440 }
441 
442 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
443 { \
444     if (col <= lastcol1)  low1 = 0;     \
445     else                 high1 = nrow1; \
446     lastcol1 = col;\
447     while (high1-low1 > 5) { \
448       t = (low1+high1)/2; \
449       if (rp1[t] > col) high1 = t; \
450       else              low1  = t; \
451     } \
452       for (_i=low1; _i<high1; _i++) { \
453         if (rp1[_i] > col) break; \
454         if (rp1[_i] == col) { \
455           if (addv == ADD_VALUES) ap1[_i] += value;   \
456           else                    ap1[_i] = value; \
457           goto a_noinsert; \
458         } \
459       }  \
460       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
461       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
462       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
463       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
464       N = nrow1++ - 1; a->nz++; high1++; \
465       /* shift up all the later entries in this row */ \
466       for (ii=N; ii>=_i; ii--) { \
467         rp1[ii+1] = rp1[ii]; \
468         ap1[ii+1] = ap1[ii]; \
469       } \
470       rp1[_i] = col;  \
471       ap1[_i] = value;  \
472       A->nonzerostate++;\
473       a_noinsert: ; \
474       ailen[row] = nrow1; \
475 }
476 
477 
478 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
479   { \
480     if (col <= lastcol2) low2 = 0;                        \
481     else high2 = nrow2;                                   \
482     lastcol2 = col;                                       \
483     while (high2-low2 > 5) {                              \
484       t = (low2+high2)/2;                                 \
485       if (rp2[t] > col) high2 = t;                        \
486       else             low2  = t;                         \
487     }                                                     \
488     for (_i=low2; _i<high2; _i++) {                       \
489       if (rp2[_i] > col) break;                           \
490       if (rp2[_i] == col) {                               \
491         if (addv == ADD_VALUES) ap2[_i] += value;         \
492         else                    ap2[_i] = value;          \
493         goto b_noinsert;                                  \
494       }                                                   \
495     }                                                     \
496     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
497     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
498     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
499     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
500     N = nrow2++ - 1; b->nz++; high2++;                    \
501     /* shift up all the later entries in this row */      \
502     for (ii=N; ii>=_i; ii--) {                            \
503       rp2[ii+1] = rp2[ii];                                \
504       ap2[ii+1] = ap2[ii];                                \
505     }                                                     \
506     rp2[_i] = col;                                        \
507     ap2[_i] = value;                                      \
508     B->nonzerostate++;                                    \
509     b_noinsert: ;                                         \
510     bilen[row] = nrow2;                                   \
511   }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
515 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
516 {
517   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
518   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
519   PetscErrorCode ierr;
520   PetscInt       l,*garray = mat->garray,diag;
521 
522   PetscFunctionBegin;
523   /* code only works for square matrices A */
524 
525   /* find size of row to the left of the diagonal part */
526   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
527   row  = row - diag;
528   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
529     if (garray[b->j[b->i[row]+l]] > diag) break;
530   }
531   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
532 
533   /* diagonal part */
534   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
535 
536   /* right of diagonal part */
537   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatSetValues_MPIAIJ"
543 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
544 {
545   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
546   PetscScalar    value;
547   PetscErrorCode ierr;
548   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
549   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
550   PetscBool      roworiented = aij->roworiented;
551 
552   /* Some Variables required in the macro */
553   Mat        A                 = aij->A;
554   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
555   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
556   MatScalar  *aa               = a->a;
557   PetscBool  ignorezeroentries = a->ignorezeroentries;
558   Mat        B                 = aij->B;
559   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
560   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
561   MatScalar  *ba               = b->a;
562 
563   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
564   PetscInt  nonew;
565   MatScalar *ap1,*ap2;
566 
567   PetscFunctionBegin;
568   for (i=0; i<m; i++) {
569     if (im[i] < 0) continue;
570 #if defined(PETSC_USE_DEBUG)
571     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
572 #endif
573     if (im[i] >= rstart && im[i] < rend) {
574       row      = im[i] - rstart;
575       lastcol1 = -1;
576       rp1      = aj + ai[row];
577       ap1      = aa + ai[row];
578       rmax1    = aimax[row];
579       nrow1    = ailen[row];
580       low1     = 0;
581       high1    = nrow1;
582       lastcol2 = -1;
583       rp2      = bj + bi[row];
584       ap2      = ba + bi[row];
585       rmax2    = bimax[row];
586       nrow2    = bilen[row];
587       low2     = 0;
588       high2    = nrow2;
589 
590       for (j=0; j<n; j++) {
591         if (roworiented) value = v[i*n+j];
592         else             value = v[i+j*m];
593         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
594         if (in[j] >= cstart && in[j] < cend) {
595           col   = in[j] - cstart;
596           nonew = a->nonew;
597           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
598         } else if (in[j] < 0) continue;
599 #if defined(PETSC_USE_DEBUG)
600         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
601 #endif
602         else {
603           if (mat->was_assembled) {
604             if (!aij->colmap) {
605               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606             }
607 #if defined(PETSC_USE_CTABLE)
608             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
609             col--;
610 #else
611             col = aij->colmap[in[j]] - 1;
612 #endif
613             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
614               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
615               col  =  in[j];
616               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
617               B     = aij->B;
618               b     = (Mat_SeqAIJ*)B->data;
619               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
620               rp2   = bj + bi[row];
621               ap2   = ba + bi[row];
622               rmax2 = bimax[row];
623               nrow2 = bilen[row];
624               low2  = 0;
625               high2 = nrow2;
626               bm    = aij->B->rmap->n;
627               ba    = b->a;
628             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
629           } else col = in[j];
630           nonew = b->nonew;
631           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
632         }
633       }
634     } else {
635       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
636       if (!aij->donotstash) {
637         mat->assembled = PETSC_FALSE;
638         if (roworiented) {
639           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
640         } else {
641           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
642         }
643       }
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatGetValues_MPIAIJ"
651 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   PetscErrorCode ierr;
655   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
656   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
657 
658   PetscFunctionBegin;
659   for (i=0; i<m; i++) {
660     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
661     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
662     if (idxm[i] >= rstart && idxm[i] < rend) {
663       row = idxm[i] - rstart;
664       for (j=0; j<n; j++) {
665         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
666         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
667         if (idxn[j] >= cstart && idxn[j] < cend) {
668           col  = idxn[j] - cstart;
669           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
670         } else {
671           if (!aij->colmap) {
672             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
673           }
674 #if defined(PETSC_USE_CTABLE)
675           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
676           col--;
677 #else
678           col = aij->colmap[idxn[j]] - 1;
679 #endif
680           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
681           else {
682             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
683           }
684         }
685       }
686     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
695 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
696 {
697   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
698   PetscErrorCode ierr;
699   PetscInt       nstash,reallocs;
700 
701   PetscFunctionBegin;
702   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
703 
704   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
705   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
706   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
712 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
713 {
714   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
715   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
716   PetscErrorCode ierr;
717   PetscMPIInt    n;
718   PetscInt       i,j,rstart,ncols,flg;
719   PetscInt       *row,*col;
720   PetscBool      other_disassembled;
721   PetscScalar    *val;
722 
723   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
724 
725   PetscFunctionBegin;
726   if (!aij->donotstash && !mat->nooffprocentries) {
727     while (1) {
728       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
729       if (!flg) break;
730 
731       for (i=0; i<n; ) {
732         /* Now identify the consecutive vals belonging to the same row */
733         for (j=i,rstart=row[j]; j<n; j++) {
734           if (row[j] != rstart) break;
735         }
736         if (j < n) ncols = j-i;
737         else       ncols = n-i;
738         /* Now assemble all these values with a single function call */
739         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
740 
741         i = j;
742       }
743     }
744     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
745   }
746   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
747   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
748 
749   /* determine if any processor has disassembled, if so we must
750      also disassemble ourselfs, in order that we may reassemble. */
751   /*
752      if nonzero structure of submatrix B cannot change then we know that
753      no processor disassembled thus we can skip this stuff
754   */
755   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
756     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
757     if (mat->was_assembled && !other_disassembled) {
758       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
759     }
760   }
761   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
762     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
763   }
764   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
765   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
766   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
767 
768   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
769 
770   aij->rowvalues = 0;
771 
772   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
773   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
774 
775   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
776   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
777     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
778     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
779   }
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
785 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
786 {
787   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
792   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatZeroRows_MPIAIJ"
798 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
799 {
800   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
801   PetscInt      *lrows;
802   PetscInt       r, len;
803   PetscErrorCode ierr;
804 
805   PetscFunctionBegin;
806   /* get locally owned rows */
807   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
808   /* fix right hand side if needed */
809   if (x && b) {
810     const PetscScalar *xx;
811     PetscScalar       *bb;
812 
813     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
814     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
815     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
816     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
817     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
818   }
819   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
820   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
821   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
822     PetscBool cong;
823     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
824     if (cong) A->congruentlayouts = 1;
825     else      A->congruentlayouts = 0;
826   }
827   if ((diag != 0.0) && A->congruentlayouts) {
828     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
829   } else if (diag != 0.0) {
830     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
831     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
832     for (r = 0; r < len; ++r) {
833       const PetscInt row = lrows[r] + A->rmap->rstart;
834       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
835     }
836     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
837     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
838   } else {
839     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840   }
841   ierr = PetscFree(lrows);CHKERRQ(ierr);
842 
843   /* only change matrix nonzero state if pattern was allowed to be changed */
844   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
845     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
846     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
853 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
854 {
855   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
856   PetscErrorCode    ierr;
857   PetscMPIInt       n = A->rmap->n;
858   PetscInt          i,j,r,m,p = 0,len = 0;
859   PetscInt          *lrows,*owners = A->rmap->range;
860   PetscSFNode       *rrows;
861   PetscSF           sf;
862   const PetscScalar *xx;
863   PetscScalar       *bb,*mask;
864   Vec               xmask,lmask;
865   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
866   const PetscInt    *aj, *ii,*ridx;
867   PetscScalar       *aa;
868 
869   PetscFunctionBegin;
870   /* Create SF where leaves are input rows and roots are owned rows */
871   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
872   for (r = 0; r < n; ++r) lrows[r] = -1;
873   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
874   for (r = 0; r < N; ++r) {
875     const PetscInt idx   = rows[r];
876     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
877     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
878       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
879     }
880     rrows[r].rank  = p;
881     rrows[r].index = rows[r] - owners[p];
882   }
883   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
884   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
885   /* Collect flags for rows to be zeroed */
886   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
887   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
888   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
889   /* Compress and put in row numbers */
890   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
891   /* zero diagonal part of matrix */
892   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
893   /* handle off diagonal part of matrix */
894   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
895   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
896   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
897   for (i=0; i<len; i++) bb[lrows[i]] = 1;
898   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
899   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
900   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
901   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
902   if (x) {
903     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
906     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
907   }
908   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
909   /* remove zeroed rows of off diagonal matrix */
910   ii = aij->i;
911   for (i=0; i<len; i++) {
912     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
913   }
914   /* loop over all elements of off process part of matrix zeroing removed columns*/
915   if (aij->compressedrow.use) {
916     m    = aij->compressedrow.nrows;
917     ii   = aij->compressedrow.i;
918     ridx = aij->compressedrow.rindex;
919     for (i=0; i<m; i++) {
920       n  = ii[i+1] - ii[i];
921       aj = aij->j + ii[i];
922       aa = aij->a + ii[i];
923 
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[*ridx] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932       ridx++;
933     }
934   } else { /* do not use compressed row format */
935     m = l->B->rmap->n;
936     for (i=0; i<m; i++) {
937       n  = ii[i+1] - ii[i];
938       aj = aij->j + ii[i];
939       aa = aij->a + ii[i];
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[i] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948     }
949   }
950   if (x) {
951     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
952     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
953   }
954   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
955   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
956   ierr = PetscFree(lrows);CHKERRQ(ierr);
957 
958   /* only change matrix nonzero state if pattern was allowed to be changed */
959   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
960     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
961     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
962   }
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "MatMult_MPIAIJ"
968 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
969 {
970   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
971   PetscErrorCode ierr;
972   PetscInt       nt;
973 
974   PetscFunctionBegin;
975   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
976   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
977   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
979   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
986 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
987 {
988   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultAdd_MPIAIJ"
998 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002 
1003   PetscFunctionBegin;
1004   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1013 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1014 {
1015   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1016   PetscErrorCode ierr;
1017   PetscBool      merged;
1018 
1019   PetscFunctionBegin;
1020   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1021   /* do nondiagonal part */
1022   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1023   if (!merged) {
1024     /* send it on its way */
1025     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1026     /* do local part */
1027     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1028     /* receive remote parts: note this assumes the values are not actually */
1029     /* added in yy until the next line, */
1030     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1031   } else {
1032     /* do local part */
1033     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1034     /* send it on its way */
1035     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1036     /* values actually were received in the Begin() but we need to call this nop */
1037     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1038   }
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1044 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1045 {
1046   MPI_Comm       comm;
1047   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1048   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1049   IS             Me,Notme;
1050   PetscErrorCode ierr;
1051   PetscInt       M,N,first,last,*notme,i;
1052   PetscMPIInt    size;
1053 
1054   PetscFunctionBegin;
1055   /* Easy test: symmetric diagonal block */
1056   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1057   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1058   if (!*f) PetscFunctionReturn(0);
1059   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1060   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1061   if (size == 1) PetscFunctionReturn(0);
1062 
1063   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1064   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1065   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1066   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1067   for (i=0; i<first; i++) notme[i] = i;
1068   for (i=last; i<M; i++) notme[i-last+first] = i;
1069   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1070   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1071   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1072   Aoff = Aoffs[0];
1073   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1074   Boff = Boffs[0];
1075   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1076   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1077   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1078   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1079   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1080   ierr = PetscFree(notme);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1086 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1087 {
1088   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1089   PetscErrorCode ierr;
1090 
1091   PetscFunctionBegin;
1092   /* do nondiagonal part */
1093   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1094   /* send it on its way */
1095   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1096   /* do local part */
1097   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1098   /* receive remote parts */
1099   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 /*
1104   This only works correctly for square matrices where the subblock A->A is the
1105    diagonal block
1106 */
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1109 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1110 {
1111   PetscErrorCode ierr;
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113 
1114   PetscFunctionBegin;
1115   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1116   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1117   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatScale_MPIAIJ"
1123 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1124 {
1125   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1126   PetscErrorCode ierr;
1127 
1128   PetscFunctionBegin;
1129   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1130   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatDestroy_MPIAIJ"
1136 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1137 {
1138   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1139   PetscErrorCode ierr;
1140 
1141   PetscFunctionBegin;
1142 #if defined(PETSC_USE_LOG)
1143   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1144 #endif
1145   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1146   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1147   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1148   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1149 #if defined(PETSC_USE_CTABLE)
1150   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1151 #else
1152   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1153 #endif
1154   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1155   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1156   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1157   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1158   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1159   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1160 
1161   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1169 #if defined(PETSC_HAVE_ELEMENTAL)
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1171 #endif
1172 #if defined(PETSC_HAVE_HYPRE)
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1174 #endif
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1180 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1181 {
1182   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1183   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1184   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1185   PetscErrorCode ierr;
1186   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1187   int            fd;
1188   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1189   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1190   PetscScalar    *column_values;
1191   PetscInt       message_count,flowcontrolcount;
1192   FILE           *file;
1193 
1194   PetscFunctionBegin;
1195   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1197   nz   = A->nz + B->nz;
1198   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1199   if (!rank) {
1200     header[0] = MAT_FILE_CLASSID;
1201     header[1] = mat->rmap->N;
1202     header[2] = mat->cmap->N;
1203 
1204     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1205     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1206     /* get largest number of rows any processor has */
1207     rlen  = mat->rmap->n;
1208     range = mat->rmap->range;
1209     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1210   } else {
1211     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212     rlen = mat->rmap->n;
1213   }
1214 
1215   /* load up the local row counts */
1216   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1217   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1218 
1219   /* store the row lengths to the file */
1220   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1221   if (!rank) {
1222     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1223     for (i=1; i<size; i++) {
1224       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1225       rlen = range[i+1] - range[i];
1226       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1227       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1228     }
1229     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1230   } else {
1231     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1232     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1233     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1234   }
1235   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1236 
1237   /* load up the local column indices */
1238   nzmax = nz; /* th processor needs space a largest processor needs */
1239   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1241   cnt   = 0;
1242   for (i=0; i<mat->rmap->n; i++) {
1243     for (j=B->i[i]; j<B->i[i+1]; j++) {
1244       if ((col = garray[B->j[j]]) > cstart) break;
1245       column_indices[cnt++] = col;
1246     }
1247     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1248     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1249   }
1250   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1251 
1252   /* store the column indices to the file */
1253   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1254   if (!rank) {
1255     MPI_Status status;
1256     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1257     for (i=1; i<size; i++) {
1258       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1259       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1260       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1261       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1262       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1263     }
1264     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1265   } else {
1266     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1267     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1270   }
1271   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1272 
1273   /* load up the local column values */
1274   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1275   cnt  = 0;
1276   for (i=0; i<mat->rmap->n; i++) {
1277     for (j=B->i[i]; j<B->i[i+1]; j++) {
1278       if (garray[B->j[j]] > cstart) break;
1279       column_values[cnt++] = B->a[j];
1280     }
1281     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1282     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1283   }
1284   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1285 
1286   /* store the column values to the file */
1287   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1288   if (!rank) {
1289     MPI_Status status;
1290     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1291     for (i=1; i<size; i++) {
1292       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1293       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1294       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1295       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1296       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1297     }
1298     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1299   } else {
1300     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1301     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1303     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1304   }
1305   ierr = PetscFree(column_values);CHKERRQ(ierr);
1306 
1307   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1308   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #include <petscdraw.h>
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1315 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1316 {
1317   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1318   PetscErrorCode    ierr;
1319   PetscMPIInt       rank = aij->rank,size = aij->size;
1320   PetscBool         isdraw,iascii,isbinary;
1321   PetscViewer       sviewer;
1322   PetscViewerFormat format;
1323 
1324   PetscFunctionBegin;
1325   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1326   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1327   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1328   if (iascii) {
1329     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1330     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1331       MatInfo   info;
1332       PetscBool inodes;
1333 
1334       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1335       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1336       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1337       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1338       if (!inodes) {
1339         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1340                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1341       } else {
1342         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1343                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1344       }
1345       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1346       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1347       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1348       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1349       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1350       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1351       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1352       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1353       PetscFunctionReturn(0);
1354     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1355       PetscInt inodecount,inodelimit,*inodes;
1356       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1357       if (inodes) {
1358         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1359       } else {
1360         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1361       }
1362       PetscFunctionReturn(0);
1363     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1364       PetscFunctionReturn(0);
1365     }
1366   } else if (isbinary) {
1367     if (size == 1) {
1368       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1369       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1370     } else {
1371       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1372     }
1373     PetscFunctionReturn(0);
1374   } else if (isdraw) {
1375     PetscDraw draw;
1376     PetscBool isnull;
1377     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1378     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1379     if (isnull) PetscFunctionReturn(0);
1380   }
1381 
1382   {
1383     /* assemble the entire matrix onto first processor. */
1384     Mat        A;
1385     Mat_SeqAIJ *Aloc;
1386     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1387     MatScalar  *a;
1388 
1389     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1390     if (!rank) {
1391       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1392     } else {
1393       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1394     }
1395     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1396     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1397     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1398     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1399     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1400 
1401     /* copy over the A part */
1402     Aloc = (Mat_SeqAIJ*)aij->A->data;
1403     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1404     row  = mat->rmap->rstart;
1405     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1406     for (i=0; i<m; i++) {
1407       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1408       row++;
1409       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1410     }
1411     aj = Aloc->j;
1412     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1413 
1414     /* copy over the B part */
1415     Aloc = (Mat_SeqAIJ*)aij->B->data;
1416     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1417     row  = mat->rmap->rstart;
1418     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1419     ct   = cols;
1420     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1421     for (i=0; i<m; i++) {
1422       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1423       row++;
1424       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1425     }
1426     ierr = PetscFree(ct);CHKERRQ(ierr);
1427     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1428     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1429     /*
1430        Everyone has to call to draw the matrix since the graphics waits are
1431        synchronized across all processors that share the PetscDraw object
1432     */
1433     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1434     if (!rank) {
1435       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1436       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1437     }
1438     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1439     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1440     ierr = MatDestroy(&A);CHKERRQ(ierr);
1441   }
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatView_MPIAIJ"
1447 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1448 {
1449   PetscErrorCode ierr;
1450   PetscBool      iascii,isdraw,issocket,isbinary;
1451 
1452   PetscFunctionBegin;
1453   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1457   if (iascii || isdraw || isbinary || issocket) {
1458     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1459   }
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatSOR_MPIAIJ"
1465 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1466 {
1467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1468   PetscErrorCode ierr;
1469   Vec            bb1 = 0;
1470   PetscBool      hasop;
1471 
1472   PetscFunctionBegin;
1473   if (flag == SOR_APPLY_UPPER) {
1474     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1475     PetscFunctionReturn(0);
1476   }
1477 
1478   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1479     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1480   }
1481 
1482   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1483     if (flag & SOR_ZERO_INITIAL_GUESS) {
1484       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1485       its--;
1486     }
1487 
1488     while (its--) {
1489       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1490       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1491 
1492       /* update rhs: bb1 = bb - B*x */
1493       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1494       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1495 
1496       /* local sweep */
1497       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1498     }
1499   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1500     if (flag & SOR_ZERO_INITIAL_GUESS) {
1501       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1502       its--;
1503     }
1504     while (its--) {
1505       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1507 
1508       /* update rhs: bb1 = bb - B*x */
1509       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1510       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1511 
1512       /* local sweep */
1513       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1514     }
1515   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1516     if (flag & SOR_ZERO_INITIAL_GUESS) {
1517       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1518       its--;
1519     }
1520     while (its--) {
1521       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1522       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1523 
1524       /* update rhs: bb1 = bb - B*x */
1525       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1526       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1527 
1528       /* local sweep */
1529       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1530     }
1531   } else if (flag & SOR_EISENSTAT) {
1532     Vec xx1;
1533 
1534     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1535     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1536 
1537     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1538     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1539     if (!mat->diag) {
1540       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1541       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1542     }
1543     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1544     if (hasop) {
1545       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1546     } else {
1547       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1548     }
1549     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1550 
1551     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1552 
1553     /* local sweep */
1554     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1555     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1556     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1557   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1558 
1559   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1560 
1561   matin->factorerrortype = mat->A->factorerrortype;
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatPermute_MPIAIJ"
1567 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1568 {
1569   Mat            aA,aB,Aperm;
1570   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1571   PetscScalar    *aa,*ba;
1572   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1573   PetscSF        rowsf,sf;
1574   IS             parcolp = NULL;
1575   PetscBool      done;
1576   PetscErrorCode ierr;
1577 
1578   PetscFunctionBegin;
1579   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1580   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1581   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1582   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1583 
1584   /* Invert row permutation to find out where my rows should go */
1585   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1586   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1587   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1588   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1589   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1590   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1591 
1592   /* Invert column permutation to find out where my columns should go */
1593   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1594   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1595   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1596   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1597   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1598   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1599   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1600 
1601   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1602   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1603   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1604 
1605   /* Find out where my gcols should go */
1606   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1607   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1608   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1609   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1610   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1611   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1612   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1613   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1614 
1615   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1616   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1617   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1618   for (i=0; i<m; i++) {
1619     PetscInt row = rdest[i],rowner;
1620     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1621     for (j=ai[i]; j<ai[i+1]; j++) {
1622       PetscInt cowner,col = cdest[aj[j]];
1623       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1624       if (rowner == cowner) dnnz[i]++;
1625       else onnz[i]++;
1626     }
1627     for (j=bi[i]; j<bi[i+1]; j++) {
1628       PetscInt cowner,col = gcdest[bj[j]];
1629       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1630       if (rowner == cowner) dnnz[i]++;
1631       else onnz[i]++;
1632     }
1633   }
1634   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1635   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1636   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1637   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1638   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1639 
1640   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1641   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1642   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1643   for (i=0; i<m; i++) {
1644     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1645     PetscInt j0,rowlen;
1646     rowlen = ai[i+1] - ai[i];
1647     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1648       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1649       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1650     }
1651     rowlen = bi[i+1] - bi[i];
1652     for (j0=j=0; j<rowlen; j0=j) {
1653       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1654       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1655     }
1656   }
1657   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1658   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1659   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1660   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1661   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1662   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1663   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1664   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1665   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1666   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1667   *B = Aperm;
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1673 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1674 {
1675   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1676   PetscErrorCode ierr;
1677 
1678   PetscFunctionBegin;
1679   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1680   if (ghosts) *ghosts = aij->garray;
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1686 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1687 {
1688   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1689   Mat            A    = mat->A,B = mat->B;
1690   PetscErrorCode ierr;
1691   PetscReal      isend[5],irecv[5];
1692 
1693   PetscFunctionBegin;
1694   info->block_size = 1.0;
1695   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1696 
1697   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1698   isend[3] = info->memory;  isend[4] = info->mallocs;
1699 
1700   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1701 
1702   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1703   isend[3] += info->memory;  isend[4] += info->mallocs;
1704   if (flag == MAT_LOCAL) {
1705     info->nz_used      = isend[0];
1706     info->nz_allocated = isend[1];
1707     info->nz_unneeded  = isend[2];
1708     info->memory       = isend[3];
1709     info->mallocs      = isend[4];
1710   } else if (flag == MAT_GLOBAL_MAX) {
1711     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1712 
1713     info->nz_used      = irecv[0];
1714     info->nz_allocated = irecv[1];
1715     info->nz_unneeded  = irecv[2];
1716     info->memory       = irecv[3];
1717     info->mallocs      = irecv[4];
1718   } else if (flag == MAT_GLOBAL_SUM) {
1719     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1720 
1721     info->nz_used      = irecv[0];
1722     info->nz_allocated = irecv[1];
1723     info->nz_unneeded  = irecv[2];
1724     info->memory       = irecv[3];
1725     info->mallocs      = irecv[4];
1726   }
1727   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1728   info->fill_ratio_needed = 0;
1729   info->factor_mallocs    = 0;
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatSetOption_MPIAIJ"
1735 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1736 {
1737   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1738   PetscErrorCode ierr;
1739 
1740   PetscFunctionBegin;
1741   switch (op) {
1742   case MAT_NEW_NONZERO_LOCATIONS:
1743   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1744   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1745   case MAT_KEEP_NONZERO_PATTERN:
1746   case MAT_NEW_NONZERO_LOCATION_ERR:
1747   case MAT_USE_INODES:
1748   case MAT_IGNORE_ZERO_ENTRIES:
1749     MatCheckPreallocated(A,1);
1750     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1751     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1752     break;
1753   case MAT_ROW_ORIENTED:
1754     MatCheckPreallocated(A,1);
1755     a->roworiented = flg;
1756 
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1759     break;
1760   case MAT_NEW_DIAGONALS:
1761     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1762     break;
1763   case MAT_IGNORE_OFF_PROC_ENTRIES:
1764     a->donotstash = flg;
1765     break;
1766   case MAT_SPD:
1767     A->spd_set = PETSC_TRUE;
1768     A->spd     = flg;
1769     if (flg) {
1770       A->symmetric                  = PETSC_TRUE;
1771       A->structurally_symmetric     = PETSC_TRUE;
1772       A->symmetric_set              = PETSC_TRUE;
1773       A->structurally_symmetric_set = PETSC_TRUE;
1774     }
1775     break;
1776   case MAT_SYMMETRIC:
1777     MatCheckPreallocated(A,1);
1778     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1779     break;
1780   case MAT_STRUCTURALLY_SYMMETRIC:
1781     MatCheckPreallocated(A,1);
1782     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1783     break;
1784   case MAT_HERMITIAN:
1785     MatCheckPreallocated(A,1);
1786     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1787     break;
1788   case MAT_SYMMETRY_ETERNAL:
1789     MatCheckPreallocated(A,1);
1790     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1791     break;
1792   case MAT_SUBMAT_SINGLEIS:
1793     A->submat_singleis = flg;
1794     break;
1795   default:
1796     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1797   }
1798   PetscFunctionReturn(0);
1799 }
1800 
1801 #undef __FUNCT__
1802 #define __FUNCT__ "MatGetRow_MPIAIJ"
1803 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1804 {
1805   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1806   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1807   PetscErrorCode ierr;
1808   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1809   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1810   PetscInt       *cmap,*idx_p;
1811 
1812   PetscFunctionBegin;
1813   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1814   mat->getrowactive = PETSC_TRUE;
1815 
1816   if (!mat->rowvalues && (idx || v)) {
1817     /*
1818         allocate enough space to hold information from the longest row.
1819     */
1820     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1821     PetscInt   max = 1,tmp;
1822     for (i=0; i<matin->rmap->n; i++) {
1823       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1824       if (max < tmp) max = tmp;
1825     }
1826     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1827   }
1828 
1829   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1830   lrow = row - rstart;
1831 
1832   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1833   if (!v)   {pvA = 0; pvB = 0;}
1834   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1835   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1836   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1837   nztot = nzA + nzB;
1838 
1839   cmap = mat->garray;
1840   if (v  || idx) {
1841     if (nztot) {
1842       /* Sort by increasing column numbers, assuming A and B already sorted */
1843       PetscInt imark = -1;
1844       if (v) {
1845         *v = v_p = mat->rowvalues;
1846         for (i=0; i<nzB; i++) {
1847           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1848           else break;
1849         }
1850         imark = i;
1851         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1852         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1853       }
1854       if (idx) {
1855         *idx = idx_p = mat->rowindices;
1856         if (imark > -1) {
1857           for (i=0; i<imark; i++) {
1858             idx_p[i] = cmap[cworkB[i]];
1859           }
1860         } else {
1861           for (i=0; i<nzB; i++) {
1862             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1863             else break;
1864           }
1865           imark = i;
1866         }
1867         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1868         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1869       }
1870     } else {
1871       if (idx) *idx = 0;
1872       if (v)   *v   = 0;
1873     }
1874   }
1875   *nz  = nztot;
1876   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1877   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 #undef __FUNCT__
1882 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1883 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1884 {
1885   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1886 
1887   PetscFunctionBegin;
1888   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1889   aij->getrowactive = PETSC_FALSE;
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatNorm_MPIAIJ"
1895 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1896 {
1897   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1898   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1899   PetscErrorCode ierr;
1900   PetscInt       i,j,cstart = mat->cmap->rstart;
1901   PetscReal      sum = 0.0;
1902   MatScalar      *v;
1903 
1904   PetscFunctionBegin;
1905   if (aij->size == 1) {
1906     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1907   } else {
1908     if (type == NORM_FROBENIUS) {
1909       v = amat->a;
1910       for (i=0; i<amat->nz; i++) {
1911         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1912       }
1913       v = bmat->a;
1914       for (i=0; i<bmat->nz; i++) {
1915         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1916       }
1917       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1918       *norm = PetscSqrtReal(*norm);
1919       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1920     } else if (type == NORM_1) { /* max column norm */
1921       PetscReal *tmp,*tmp2;
1922       PetscInt  *jj,*garray = aij->garray;
1923       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1924       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1925       *norm = 0.0;
1926       v     = amat->a; jj = amat->j;
1927       for (j=0; j<amat->nz; j++) {
1928         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1929       }
1930       v = bmat->a; jj = bmat->j;
1931       for (j=0; j<bmat->nz; j++) {
1932         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1933       }
1934       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1935       for (j=0; j<mat->cmap->N; j++) {
1936         if (tmp2[j] > *norm) *norm = tmp2[j];
1937       }
1938       ierr = PetscFree(tmp);CHKERRQ(ierr);
1939       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1940       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1941     } else if (type == NORM_INFINITY) { /* max row norm */
1942       PetscReal ntemp = 0.0;
1943       for (j=0; j<aij->A->rmap->n; j++) {
1944         v   = amat->a + amat->i[j];
1945         sum = 0.0;
1946         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1947           sum += PetscAbsScalar(*v); v++;
1948         }
1949         v = bmat->a + bmat->i[j];
1950         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1951           sum += PetscAbsScalar(*v); v++;
1952         }
1953         if (sum > ntemp) ntemp = sum;
1954       }
1955       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1956       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1957     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1958   }
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatTranspose_MPIAIJ"
1964 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1965 {
1966   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1967   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1968   PetscErrorCode ierr;
1969   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1970   PetscInt       cstart = A->cmap->rstart,ncol;
1971   Mat            B;
1972   MatScalar      *array;
1973 
1974   PetscFunctionBegin;
1975   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1976 
1977   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1978   ai = Aloc->i; aj = Aloc->j;
1979   bi = Bloc->i; bj = Bloc->j;
1980   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1981     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1982     PetscSFNode          *oloc;
1983     PETSC_UNUSED PetscSF sf;
1984 
1985     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1986     /* compute d_nnz for preallocation */
1987     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1988     for (i=0; i<ai[ma]; i++) {
1989       d_nnz[aj[i]]++;
1990       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1991     }
1992     /* compute local off-diagonal contributions */
1993     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1994     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1995     /* map those to global */
1996     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1997     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1998     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1999     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2000     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2001     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2002     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2003 
2004     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2005     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2006     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2007     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2008     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2009     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2010   } else {
2011     B    = *matout;
2012     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2013     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2014   }
2015 
2016   /* copy over the A part */
2017   array = Aloc->a;
2018   row   = A->rmap->rstart;
2019   for (i=0; i<ma; i++) {
2020     ncol = ai[i+1]-ai[i];
2021     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2022     row++;
2023     array += ncol; aj += ncol;
2024   }
2025   aj = Aloc->j;
2026   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2027 
2028   /* copy over the B part */
2029   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2030   array = Bloc->a;
2031   row   = A->rmap->rstart;
2032   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2033   cols_tmp = cols;
2034   for (i=0; i<mb; i++) {
2035     ncol = bi[i+1]-bi[i];
2036     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2037     row++;
2038     array += ncol; cols_tmp += ncol;
2039   }
2040   ierr = PetscFree(cols);CHKERRQ(ierr);
2041 
2042   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2043   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2044   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2045     *matout = B;
2046   } else {
2047     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2048   }
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2054 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2055 {
2056   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2057   Mat            a    = aij->A,b = aij->B;
2058   PetscErrorCode ierr;
2059   PetscInt       s1,s2,s3;
2060 
2061   PetscFunctionBegin;
2062   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2063   if (rr) {
2064     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2065     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2066     /* Overlap communication with computation. */
2067     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2068   }
2069   if (ll) {
2070     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2071     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2072     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2073   }
2074   /* scale  the diagonal block */
2075   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2076 
2077   if (rr) {
2078     /* Do a scatter end and then right scale the off-diagonal block */
2079     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2080     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2081   }
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2087 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2088 {
2089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2090   PetscErrorCode ierr;
2091 
2092   PetscFunctionBegin;
2093   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatEqual_MPIAIJ"
2099 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2100 {
2101   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2102   Mat            a,b,c,d;
2103   PetscBool      flg;
2104   PetscErrorCode ierr;
2105 
2106   PetscFunctionBegin;
2107   a = matA->A; b = matA->B;
2108   c = matB->A; d = matB->B;
2109 
2110   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2111   if (flg) {
2112     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2113   }
2114   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatCopy_MPIAIJ"
2120 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2121 {
2122   PetscErrorCode ierr;
2123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2124   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2125 
2126   PetscFunctionBegin;
2127   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2128   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2129     /* because of the column compression in the off-processor part of the matrix a->B,
2130        the number of columns in a->B and b->B may be different, hence we cannot call
2131        the MatCopy() directly on the two parts. If need be, we can provide a more
2132        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2133        then copying the submatrices */
2134     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2135   } else {
2136     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2137     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2138   }
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatSetUp_MPIAIJ"
2144 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2145 {
2146   PetscErrorCode ierr;
2147 
2148   PetscFunctionBegin;
2149   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 /*
2154    Computes the number of nonzeros per row needed for preallocation when X and Y
2155    have different nonzero structure.
2156 */
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2159 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2160 {
2161   PetscInt       i,j,k,nzx,nzy;
2162 
2163   PetscFunctionBegin;
2164   /* Set the number of nonzeros in the new matrix */
2165   for (i=0; i<m; i++) {
2166     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2167     nzx = xi[i+1] - xi[i];
2168     nzy = yi[i+1] - yi[i];
2169     nnz[i] = 0;
2170     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2171       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2172       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2173       nnz[i]++;
2174     }
2175     for (; k<nzy; k++) nnz[i]++;
2176   }
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2181 #undef __FUNCT__
2182 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2183 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2184 {
2185   PetscErrorCode ierr;
2186   PetscInt       m = Y->rmap->N;
2187   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2188   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2189 
2190   PetscFunctionBegin;
2191   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatAXPY_MPIAIJ"
2197 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2198 {
2199   PetscErrorCode ierr;
2200   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2201   PetscBLASInt   bnz,one=1;
2202   Mat_SeqAIJ     *x,*y;
2203 
2204   PetscFunctionBegin;
2205   if (str == SAME_NONZERO_PATTERN) {
2206     PetscScalar alpha = a;
2207     x    = (Mat_SeqAIJ*)xx->A->data;
2208     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2209     y    = (Mat_SeqAIJ*)yy->A->data;
2210     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2211     x    = (Mat_SeqAIJ*)xx->B->data;
2212     y    = (Mat_SeqAIJ*)yy->B->data;
2213     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2214     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2215     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2216   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2217     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2218   } else {
2219     Mat      B;
2220     PetscInt *nnz_d,*nnz_o;
2221     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2222     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2223     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2224     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2225     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2226     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2227     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2228     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2229     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2230     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2231     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2232     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2233     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2234     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2235   }
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2240 
2241 #undef __FUNCT__
2242 #define __FUNCT__ "MatConjugate_MPIAIJ"
2243 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2244 {
2245 #if defined(PETSC_USE_COMPLEX)
2246   PetscErrorCode ierr;
2247   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2248 
2249   PetscFunctionBegin;
2250   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2251   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2252 #else
2253   PetscFunctionBegin;
2254 #endif
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatRealPart_MPIAIJ"
2260 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2261 {
2262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2263   PetscErrorCode ierr;
2264 
2265   PetscFunctionBegin;
2266   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2267   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2273 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2274 {
2275   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2276   PetscErrorCode ierr;
2277 
2278   PetscFunctionBegin;
2279   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2280   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2286 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2287 {
2288   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2289   PetscErrorCode ierr;
2290   PetscInt       i,*idxb = 0;
2291   PetscScalar    *va,*vb;
2292   Vec            vtmp;
2293 
2294   PetscFunctionBegin;
2295   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2296   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2297   if (idx) {
2298     for (i=0; i<A->rmap->n; i++) {
2299       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2300     }
2301   }
2302 
2303   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2304   if (idx) {
2305     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2306   }
2307   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2308   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2309 
2310   for (i=0; i<A->rmap->n; i++) {
2311     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2312       va[i] = vb[i];
2313       if (idx) idx[i] = a->garray[idxb[i]];
2314     }
2315   }
2316 
2317   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2318   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2319   ierr = PetscFree(idxb);CHKERRQ(ierr);
2320   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 #undef __FUNCT__
2325 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2326 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2327 {
2328   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2329   PetscErrorCode ierr;
2330   PetscInt       i,*idxb = 0;
2331   PetscScalar    *va,*vb;
2332   Vec            vtmp;
2333 
2334   PetscFunctionBegin;
2335   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2336   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2337   if (idx) {
2338     for (i=0; i<A->cmap->n; i++) {
2339       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2340     }
2341   }
2342 
2343   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2344   if (idx) {
2345     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2346   }
2347   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2348   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2349 
2350   for (i=0; i<A->rmap->n; i++) {
2351     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2352       va[i] = vb[i];
2353       if (idx) idx[i] = a->garray[idxb[i]];
2354     }
2355   }
2356 
2357   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2358   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2359   ierr = PetscFree(idxb);CHKERRQ(ierr);
2360   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 #undef __FUNCT__
2365 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2366 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2367 {
2368   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2369   PetscInt       n      = A->rmap->n;
2370   PetscInt       cstart = A->cmap->rstart;
2371   PetscInt       *cmap  = mat->garray;
2372   PetscInt       *diagIdx, *offdiagIdx;
2373   Vec            diagV, offdiagV;
2374   PetscScalar    *a, *diagA, *offdiagA;
2375   PetscInt       r;
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2380   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2381   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2382   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2383   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2384   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2385   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2386   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2387   for (r = 0; r < n; ++r) {
2388     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2389       a[r]   = diagA[r];
2390       idx[r] = cstart + diagIdx[r];
2391     } else {
2392       a[r]   = offdiagA[r];
2393       idx[r] = cmap[offdiagIdx[r]];
2394     }
2395   }
2396   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2397   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2398   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2399   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2400   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2401   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2407 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2408 {
2409   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2410   PetscInt       n      = A->rmap->n;
2411   PetscInt       cstart = A->cmap->rstart;
2412   PetscInt       *cmap  = mat->garray;
2413   PetscInt       *diagIdx, *offdiagIdx;
2414   Vec            diagV, offdiagV;
2415   PetscScalar    *a, *diagA, *offdiagA;
2416   PetscInt       r;
2417   PetscErrorCode ierr;
2418 
2419   PetscFunctionBegin;
2420   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2421   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2422   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2423   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2424   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2425   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2426   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2427   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2428   for (r = 0; r < n; ++r) {
2429     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2430       a[r]   = diagA[r];
2431       idx[r] = cstart + diagIdx[r];
2432     } else {
2433       a[r]   = offdiagA[r];
2434       idx[r] = cmap[offdiagIdx[r]];
2435     }
2436   }
2437   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2438   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2439   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2440   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2441   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2442   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 #undef __FUNCT__
2447 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2448 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2449 {
2450   PetscErrorCode ierr;
2451   Mat            *dummy;
2452 
2453   PetscFunctionBegin;
2454   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2455   *newmat = *dummy;
2456   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 #undef __FUNCT__
2461 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2462 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2463 {
2464   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2465   PetscErrorCode ierr;
2466 
2467   PetscFunctionBegin;
2468   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2469   A->factorerrortype = a->A->factorerrortype;
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2475 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2476 {
2477   PetscErrorCode ierr;
2478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2479 
2480   PetscFunctionBegin;
2481   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2482   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2483   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2484   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 #undef __FUNCT__
2489 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2490 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2491 {
2492   PetscFunctionBegin;
2493   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2494   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 #undef __FUNCT__
2499 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2500 /*@
2501    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2502 
2503    Collective on Mat
2504 
2505    Input Parameters:
2506 +    A - the matrix
2507 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2508 
2509  Level: advanced
2510 
2511 @*/
2512 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2513 {
2514   PetscErrorCode       ierr;
2515 
2516   PetscFunctionBegin;
2517   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 #undef __FUNCT__
2522 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2523 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2524 {
2525   PetscErrorCode       ierr;
2526   PetscBool            sc = PETSC_FALSE,flg;
2527 
2528   PetscFunctionBegin;
2529   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2530   ierr = PetscObjectOptionsBegin((PetscObject)A);
2531     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2532     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2533     if (flg) {
2534       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2535     }
2536   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatShift_MPIAIJ"
2542 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2543 {
2544   PetscErrorCode ierr;
2545   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2546   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2547 
2548   PetscFunctionBegin;
2549   if (!Y->preallocated) {
2550     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2551   } else if (!aij->nz) {
2552     PetscInt nonew = aij->nonew;
2553     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2554     aij->nonew = nonew;
2555   }
2556   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2562 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2563 {
2564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2565   PetscErrorCode ierr;
2566 
2567   PetscFunctionBegin;
2568   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2569   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2570   if (d) {
2571     PetscInt rstart;
2572     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2573     *d += rstart;
2574 
2575   }
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 
2580 /* -------------------------------------------------------------------*/
2581 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2582                                        MatGetRow_MPIAIJ,
2583                                        MatRestoreRow_MPIAIJ,
2584                                        MatMult_MPIAIJ,
2585                                 /* 4*/ MatMultAdd_MPIAIJ,
2586                                        MatMultTranspose_MPIAIJ,
2587                                        MatMultTransposeAdd_MPIAIJ,
2588                                        0,
2589                                        0,
2590                                        0,
2591                                 /*10*/ 0,
2592                                        0,
2593                                        0,
2594                                        MatSOR_MPIAIJ,
2595                                        MatTranspose_MPIAIJ,
2596                                 /*15*/ MatGetInfo_MPIAIJ,
2597                                        MatEqual_MPIAIJ,
2598                                        MatGetDiagonal_MPIAIJ,
2599                                        MatDiagonalScale_MPIAIJ,
2600                                        MatNorm_MPIAIJ,
2601                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2602                                        MatAssemblyEnd_MPIAIJ,
2603                                        MatSetOption_MPIAIJ,
2604                                        MatZeroEntries_MPIAIJ,
2605                                 /*24*/ MatZeroRows_MPIAIJ,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                 /*29*/ MatSetUp_MPIAIJ,
2611                                        0,
2612                                        0,
2613                                        MatGetDiagonalBlock_MPIAIJ,
2614                                        0,
2615                                 /*34*/ MatDuplicate_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                 /*39*/ MatAXPY_MPIAIJ,
2621                                        MatGetSubMatrices_MPIAIJ,
2622                                        MatIncreaseOverlap_MPIAIJ,
2623                                        MatGetValues_MPIAIJ,
2624                                        MatCopy_MPIAIJ,
2625                                 /*44*/ MatGetRowMax_MPIAIJ,
2626                                        MatScale_MPIAIJ,
2627                                        MatShift_MPIAIJ,
2628                                        MatDiagonalSet_MPIAIJ,
2629                                        MatZeroRowsColumns_MPIAIJ,
2630                                 /*49*/ MatSetRandom_MPIAIJ,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2636                                        0,
2637                                        MatSetUnfactored_MPIAIJ,
2638                                        MatPermute_MPIAIJ,
2639                                        0,
2640                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2641                                        MatDestroy_MPIAIJ,
2642                                        MatView_MPIAIJ,
2643                                        0,
2644                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2645                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2646                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2647                                        0,
2648                                        0,
2649                                        0,
2650                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2651                                        MatGetRowMinAbs_MPIAIJ,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                 /*75*/ MatFDColoringApply_AIJ,
2657                                        MatSetFromOptions_MPIAIJ,
2658                                        0,
2659                                        0,
2660                                        MatFindZeroDiagonals_MPIAIJ,
2661                                 /*80*/ 0,
2662                                        0,
2663                                        0,
2664                                 /*83*/ MatLoad_MPIAIJ,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2671                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2672                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2673                                        MatPtAP_MPIAIJ_MPIAIJ,
2674                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2675                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                 /*99*/ 0,
2681                                        0,
2682                                        0,
2683                                        MatConjugate_MPIAIJ,
2684                                        0,
2685                                 /*104*/MatSetValuesRow_MPIAIJ,
2686                                        MatRealPart_MPIAIJ,
2687                                        MatImaginaryPart_MPIAIJ,
2688                                        0,
2689                                        0,
2690                                 /*109*/0,
2691                                        0,
2692                                        MatGetRowMin_MPIAIJ,
2693                                        0,
2694                                        MatMissingDiagonal_MPIAIJ,
2695                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2696                                        0,
2697                                        MatGetGhosts_MPIAIJ,
2698                                        0,
2699                                        0,
2700                                 /*119*/0,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                        MatGetMultiProcBlock_MPIAIJ,
2705                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2706                                        MatGetColumnNorms_MPIAIJ,
2707                                        MatInvertBlockDiagonal_MPIAIJ,
2708                                        0,
2709                                        MatGetSubMatricesMPI_MPIAIJ,
2710                                 /*129*/0,
2711                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2712                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2713                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2714                                        0,
2715                                 /*134*/0,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                 /*139*/MatSetBlockSizes_MPIAIJ,
2721                                        0,
2722                                        0,
2723                                        MatFDColoringSetUp_MPIXAIJ,
2724                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2725                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2726 };
2727 
2728 /* ----------------------------------------------------------------------------------------*/
2729 
2730 #undef __FUNCT__
2731 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2732 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2733 {
2734   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2735   PetscErrorCode ierr;
2736 
2737   PetscFunctionBegin;
2738   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2739   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 #undef __FUNCT__
2744 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2745 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2746 {
2747   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2748   PetscErrorCode ierr;
2749 
2750   PetscFunctionBegin;
2751   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2752   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2753   PetscFunctionReturn(0);
2754 }
2755 
2756 #undef __FUNCT__
2757 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2758 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2759 {
2760   Mat_MPIAIJ     *b;
2761   PetscErrorCode ierr;
2762 
2763   PetscFunctionBegin;
2764   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2765   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2766   b = (Mat_MPIAIJ*)B->data;
2767 
2768 #if defined(PETSC_USE_CTABLE)
2769   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2770 #else
2771   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2772 #endif
2773   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2774   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2775   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2776 
2777   /* Because the B will have been resized we simply destroy it and create a new one each time */
2778   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2779   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2780   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2781   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2782   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2783   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2784 
2785   if (!B->preallocated) {
2786     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2787     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2788     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2789     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2790     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2791   }
2792 
2793   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2794   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2795   B->preallocated  = PETSC_TRUE;
2796   B->was_assembled = PETSC_FALSE;
2797   B->assembled     = PETSC_FALSE;;
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 #undef __FUNCT__
2802 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2803 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2804 {
2805   Mat            mat;
2806   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2807   PetscErrorCode ierr;
2808 
2809   PetscFunctionBegin;
2810   *newmat = 0;
2811   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2812   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2813   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2814   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2815   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2816   a       = (Mat_MPIAIJ*)mat->data;
2817 
2818   mat->factortype   = matin->factortype;
2819   mat->assembled    = PETSC_TRUE;
2820   mat->insertmode   = NOT_SET_VALUES;
2821   mat->preallocated = PETSC_TRUE;
2822 
2823   a->size         = oldmat->size;
2824   a->rank         = oldmat->rank;
2825   a->donotstash   = oldmat->donotstash;
2826   a->roworiented  = oldmat->roworiented;
2827   a->rowindices   = 0;
2828   a->rowvalues    = 0;
2829   a->getrowactive = PETSC_FALSE;
2830 
2831   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2832   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2833 
2834   if (oldmat->colmap) {
2835 #if defined(PETSC_USE_CTABLE)
2836     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2837 #else
2838     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2839     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2840     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2841 #endif
2842   } else a->colmap = 0;
2843   if (oldmat->garray) {
2844     PetscInt len;
2845     len  = oldmat->B->cmap->n;
2846     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2847     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2848     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2849   } else a->garray = 0;
2850 
2851   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2852   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2853   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2854   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2855   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2856   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2857   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2858   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2859   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2860   *newmat = mat;
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 
2865 
2866 #undef __FUNCT__
2867 #define __FUNCT__ "MatLoad_MPIAIJ"
2868 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2869 {
2870   PetscScalar    *vals,*svals;
2871   MPI_Comm       comm;
2872   PetscErrorCode ierr;
2873   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2874   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2875   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2876   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2877   PetscInt       cend,cstart,n,*rowners;
2878   int            fd;
2879   PetscInt       bs = newMat->rmap->bs;
2880 
2881   PetscFunctionBegin;
2882   /* force binary viewer to load .info file if it has not yet done so */
2883   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2884   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2885   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2886   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2887   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2888   if (!rank) {
2889     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2890     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2891     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2892   }
2893 
2894   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2895   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2896   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2897   if (bs < 0) bs = 1;
2898 
2899   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2900   M    = header[1]; N = header[2];
2901 
2902   /* If global sizes are set, check if they are consistent with that given in the file */
2903   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2904   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2905 
2906   /* determine ownership of all (block) rows */
2907   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2908   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2909   else m = newMat->rmap->n; /* Set by user */
2910 
2911   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2912   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2913 
2914   /* First process needs enough room for process with most rows */
2915   if (!rank) {
2916     mmax = rowners[1];
2917     for (i=2; i<=size; i++) {
2918       mmax = PetscMax(mmax, rowners[i]);
2919     }
2920   } else mmax = -1;             /* unused, but compilers complain */
2921 
2922   rowners[0] = 0;
2923   for (i=2; i<=size; i++) {
2924     rowners[i] += rowners[i-1];
2925   }
2926   rstart = rowners[rank];
2927   rend   = rowners[rank+1];
2928 
2929   /* distribute row lengths to all processors */
2930   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2931   if (!rank) {
2932     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2933     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2934     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2935     for (j=0; j<m; j++) {
2936       procsnz[0] += ourlens[j];
2937     }
2938     for (i=1; i<size; i++) {
2939       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2940       /* calculate the number of nonzeros on each processor */
2941       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2942         procsnz[i] += rowlengths[j];
2943       }
2944       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2945     }
2946     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2947   } else {
2948     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2949   }
2950 
2951   if (!rank) {
2952     /* determine max buffer needed and allocate it */
2953     maxnz = 0;
2954     for (i=0; i<size; i++) {
2955       maxnz = PetscMax(maxnz,procsnz[i]);
2956     }
2957     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2958 
2959     /* read in my part of the matrix column indices  */
2960     nz   = procsnz[0];
2961     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2962     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2963 
2964     /* read in every one elses and ship off */
2965     for (i=1; i<size; i++) {
2966       nz   = procsnz[i];
2967       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2968       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2969     }
2970     ierr = PetscFree(cols);CHKERRQ(ierr);
2971   } else {
2972     /* determine buffer space needed for message */
2973     nz = 0;
2974     for (i=0; i<m; i++) {
2975       nz += ourlens[i];
2976     }
2977     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2978 
2979     /* receive message of column indices*/
2980     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2981   }
2982 
2983   /* determine column ownership if matrix is not square */
2984   if (N != M) {
2985     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2986     else n = newMat->cmap->n;
2987     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2988     cstart = cend - n;
2989   } else {
2990     cstart = rstart;
2991     cend   = rend;
2992     n      = cend - cstart;
2993   }
2994 
2995   /* loop over local rows, determining number of off diagonal entries */
2996   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2997   jj   = 0;
2998   for (i=0; i<m; i++) {
2999     for (j=0; j<ourlens[i]; j++) {
3000       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3001       jj++;
3002     }
3003   }
3004 
3005   for (i=0; i<m; i++) {
3006     ourlens[i] -= offlens[i];
3007   }
3008   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3009 
3010   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3011 
3012   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3013 
3014   for (i=0; i<m; i++) {
3015     ourlens[i] += offlens[i];
3016   }
3017 
3018   if (!rank) {
3019     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3020 
3021     /* read in my part of the matrix numerical values  */
3022     nz   = procsnz[0];
3023     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3024 
3025     /* insert into matrix */
3026     jj      = rstart;
3027     smycols = mycols;
3028     svals   = vals;
3029     for (i=0; i<m; i++) {
3030       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3031       smycols += ourlens[i];
3032       svals   += ourlens[i];
3033       jj++;
3034     }
3035 
3036     /* read in other processors and ship out */
3037     for (i=1; i<size; i++) {
3038       nz   = procsnz[i];
3039       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3040       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3041     }
3042     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3043   } else {
3044     /* receive numeric values */
3045     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3046 
3047     /* receive message of values*/
3048     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3049 
3050     /* insert into matrix */
3051     jj      = rstart;
3052     smycols = mycols;
3053     svals   = vals;
3054     for (i=0; i<m; i++) {
3055       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3056       smycols += ourlens[i];
3057       svals   += ourlens[i];
3058       jj++;
3059     }
3060   }
3061   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3062   ierr = PetscFree(vals);CHKERRQ(ierr);
3063   ierr = PetscFree(mycols);CHKERRQ(ierr);
3064   ierr = PetscFree(rowners);CHKERRQ(ierr);
3065   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3066   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3067   PetscFunctionReturn(0);
3068 }
3069 
3070 #undef __FUNCT__
3071 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3072 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3073 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3074 {
3075   PetscErrorCode ierr;
3076   IS             iscol_local;
3077   PetscInt       csize;
3078 
3079   PetscFunctionBegin;
3080   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3081   if (call == MAT_REUSE_MATRIX) {
3082     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3083     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3084   } else {
3085     /* check if we are grabbing all columns*/
3086     PetscBool    isstride;
3087     PetscMPIInt  lisstride = 0,gisstride;
3088     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3089     if (isstride) {
3090       PetscInt  start,len,mstart,mlen;
3091       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3092       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3093       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3094       if (mstart == start && mlen-mstart == len) lisstride = 1;
3095     }
3096     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3097     if (gisstride) {
3098       PetscInt N;
3099       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3100       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3101       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3102       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3103     } else {
3104       PetscInt cbs;
3105       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3106       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3107       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3108     }
3109   }
3110   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3111   if (call == MAT_INITIAL_MATRIX) {
3112     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3113     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3114   }
3115   PetscFunctionReturn(0);
3116 }
3117 
3118 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3119 #undef __FUNCT__
3120 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3121 /*
3122     Not great since it makes two copies of the submatrix, first an SeqAIJ
3123   in local and then by concatenating the local matrices the end result.
3124   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3125 
3126   Note: This requires a sequential iscol with all indices.
3127 */
3128 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3129 {
3130   PetscErrorCode ierr;
3131   PetscMPIInt    rank,size;
3132   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3133   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3134   PetscBool      allcolumns, colflag;
3135   Mat            M,Mreuse;
3136   MatScalar      *vwork,*aa;
3137   MPI_Comm       comm;
3138   Mat_SeqAIJ     *aij;
3139 
3140   PetscFunctionBegin;
3141   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3142   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3143   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3144 
3145   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3146   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3147   if (colflag && ncol == mat->cmap->N) {
3148     allcolumns = PETSC_TRUE;
3149     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3150   } else {
3151     allcolumns = PETSC_FALSE;
3152   }
3153   if (call ==  MAT_REUSE_MATRIX) {
3154     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3155     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3156     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3157   } else {
3158     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3159   }
3160 
3161   /*
3162       m - number of local rows
3163       n - number of columns (same on all processors)
3164       rstart - first row in new global matrix generated
3165   */
3166   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3167   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3168   if (call == MAT_INITIAL_MATRIX) {
3169     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3170     ii  = aij->i;
3171     jj  = aij->j;
3172 
3173     /*
3174         Determine the number of non-zeros in the diagonal and off-diagonal
3175         portions of the matrix in order to do correct preallocation
3176     */
3177 
3178     /* first get start and end of "diagonal" columns */
3179     if (csize == PETSC_DECIDE) {
3180       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3181       if (mglobal == n) { /* square matrix */
3182         nlocal = m;
3183       } else {
3184         nlocal = n/size + ((n % size) > rank);
3185       }
3186     } else {
3187       nlocal = csize;
3188     }
3189     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3190     rstart = rend - nlocal;
3191     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3192 
3193     /* next, compute all the lengths */
3194     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3195     olens = dlens + m;
3196     for (i=0; i<m; i++) {
3197       jend = ii[i+1] - ii[i];
3198       olen = 0;
3199       dlen = 0;
3200       for (j=0; j<jend; j++) {
3201         if (*jj < rstart || *jj >= rend) olen++;
3202         else dlen++;
3203         jj++;
3204       }
3205       olens[i] = olen;
3206       dlens[i] = dlen;
3207     }
3208     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3209     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3210     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3211     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3212     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3213     ierr = PetscFree(dlens);CHKERRQ(ierr);
3214   } else {
3215     PetscInt ml,nl;
3216 
3217     M    = *newmat;
3218     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3219     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3220     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3221     /*
3222          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3223        rather than the slower MatSetValues().
3224     */
3225     M->was_assembled = PETSC_TRUE;
3226     M->assembled     = PETSC_FALSE;
3227   }
3228   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3229   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3230   ii   = aij->i;
3231   jj   = aij->j;
3232   aa   = aij->a;
3233   for (i=0; i<m; i++) {
3234     row   = rstart + i;
3235     nz    = ii[i+1] - ii[i];
3236     cwork = jj;     jj += nz;
3237     vwork = aa;     aa += nz;
3238     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3239   }
3240 
3241   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3242   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3243   *newmat = M;
3244 
3245   /* save submatrix used in processor for next request */
3246   if (call ==  MAT_INITIAL_MATRIX) {
3247     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3248     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3249   }
3250   PetscFunctionReturn(0);
3251 }
3252 
3253 #undef __FUNCT__
3254 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3255 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3256 {
3257   PetscInt       m,cstart, cend,j,nnz,i,d;
3258   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3259   const PetscInt *JJ;
3260   PetscScalar    *values;
3261   PetscErrorCode ierr;
3262 
3263   PetscFunctionBegin;
3264   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3265 
3266   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3267   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3268   m      = B->rmap->n;
3269   cstart = B->cmap->rstart;
3270   cend   = B->cmap->rend;
3271   rstart = B->rmap->rstart;
3272 
3273   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3274 
3275 #if defined(PETSC_USE_DEBUGGING)
3276   for (i=0; i<m; i++) {
3277     nnz = Ii[i+1]- Ii[i];
3278     JJ  = J + Ii[i];
3279     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3280     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3281     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3282   }
3283 #endif
3284 
3285   for (i=0; i<m; i++) {
3286     nnz     = Ii[i+1]- Ii[i];
3287     JJ      = J + Ii[i];
3288     nnz_max = PetscMax(nnz_max,nnz);
3289     d       = 0;
3290     for (j=0; j<nnz; j++) {
3291       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3292     }
3293     d_nnz[i] = d;
3294     o_nnz[i] = nnz - d;
3295   }
3296   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3297   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3298 
3299   if (v) values = (PetscScalar*)v;
3300   else {
3301     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3302   }
3303 
3304   for (i=0; i<m; i++) {
3305     ii   = i + rstart;
3306     nnz  = Ii[i+1]- Ii[i];
3307     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3308   }
3309   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3310   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3311 
3312   if (!v) {
3313     ierr = PetscFree(values);CHKERRQ(ierr);
3314   }
3315   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3316   PetscFunctionReturn(0);
3317 }
3318 
3319 #undef __FUNCT__
3320 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3321 /*@
3322    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3323    (the default parallel PETSc format).
3324 
3325    Collective on MPI_Comm
3326 
3327    Input Parameters:
3328 +  B - the matrix
3329 .  i - the indices into j for the start of each local row (starts with zero)
3330 .  j - the column indices for each local row (starts with zero)
3331 -  v - optional values in the matrix
3332 
3333    Level: developer
3334 
3335    Notes:
3336        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3337      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3338      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3339 
3340        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3341 
3342        The format which is used for the sparse matrix input, is equivalent to a
3343     row-major ordering.. i.e for the following matrix, the input data expected is
3344     as shown
3345 
3346 $        1 0 0
3347 $        2 0 3     P0
3348 $       -------
3349 $        4 5 6     P1
3350 $
3351 $     Process0 [P0]: rows_owned=[0,1]
3352 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3353 $        j =  {0,0,2}  [size = 3]
3354 $        v =  {1,2,3}  [size = 3]
3355 $
3356 $     Process1 [P1]: rows_owned=[2]
3357 $        i =  {0,3}    [size = nrow+1  = 1+1]
3358 $        j =  {0,1,2}  [size = 3]
3359 $        v =  {4,5,6}  [size = 3]
3360 
3361 .keywords: matrix, aij, compressed row, sparse, parallel
3362 
3363 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3364           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3365 @*/
3366 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3367 {
3368   PetscErrorCode ierr;
3369 
3370   PetscFunctionBegin;
3371   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3372   PetscFunctionReturn(0);
3373 }
3374 
3375 #undef __FUNCT__
3376 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3377 /*@C
3378    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3379    (the default parallel PETSc format).  For good matrix assembly performance
3380    the user should preallocate the matrix storage by setting the parameters
3381    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3382    performance can be increased by more than a factor of 50.
3383 
3384    Collective on MPI_Comm
3385 
3386    Input Parameters:
3387 +  B - the matrix
3388 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3389            (same value is used for all local rows)
3390 .  d_nnz - array containing the number of nonzeros in the various rows of the
3391            DIAGONAL portion of the local submatrix (possibly different for each row)
3392            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3393            The size of this array is equal to the number of local rows, i.e 'm'.
3394            For matrices that will be factored, you must leave room for (and set)
3395            the diagonal entry even if it is zero.
3396 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3397            submatrix (same value is used for all local rows).
3398 -  o_nnz - array containing the number of nonzeros in the various rows of the
3399            OFF-DIAGONAL portion of the local submatrix (possibly different for
3400            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3401            structure. The size of this array is equal to the number
3402            of local rows, i.e 'm'.
3403 
3404    If the *_nnz parameter is given then the *_nz parameter is ignored
3405 
3406    The AIJ format (also called the Yale sparse matrix format or
3407    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3408    storage.  The stored row and column indices begin with zero.
3409    See Users-Manual: ch_mat for details.
3410 
3411    The parallel matrix is partitioned such that the first m0 rows belong to
3412    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3413    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3414 
3415    The DIAGONAL portion of the local submatrix of a processor can be defined
3416    as the submatrix which is obtained by extraction the part corresponding to
3417    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3418    first row that belongs to the processor, r2 is the last row belonging to
3419    the this processor, and c1-c2 is range of indices of the local part of a
3420    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3421    common case of a square matrix, the row and column ranges are the same and
3422    the DIAGONAL part is also square. The remaining portion of the local
3423    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3424 
3425    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3426 
3427    You can call MatGetInfo() to get information on how effective the preallocation was;
3428    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3429    You can also run with the option -info and look for messages with the string
3430    malloc in them to see if additional memory allocation was needed.
3431 
3432    Example usage:
3433 
3434    Consider the following 8x8 matrix with 34 non-zero values, that is
3435    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3436    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3437    as follows:
3438 
3439 .vb
3440             1  2  0  |  0  3  0  |  0  4
3441     Proc0   0  5  6  |  7  0  0  |  8  0
3442             9  0 10  | 11  0  0  | 12  0
3443     -------------------------------------
3444            13  0 14  | 15 16 17  |  0  0
3445     Proc1   0 18  0  | 19 20 21  |  0  0
3446             0  0  0  | 22 23  0  | 24  0
3447     -------------------------------------
3448     Proc2  25 26 27  |  0  0 28  | 29  0
3449            30  0  0  | 31 32 33  |  0 34
3450 .ve
3451 
3452    This can be represented as a collection of submatrices as:
3453 
3454 .vb
3455       A B C
3456       D E F
3457       G H I
3458 .ve
3459 
3460    Where the submatrices A,B,C are owned by proc0, D,E,F are
3461    owned by proc1, G,H,I are owned by proc2.
3462 
3463    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3464    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3465    The 'M','N' parameters are 8,8, and have the same values on all procs.
3466 
3467    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3468    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3469    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3470    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3471    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3472    matrix, ans [DF] as another SeqAIJ matrix.
3473 
3474    When d_nz, o_nz parameters are specified, d_nz storage elements are
3475    allocated for every row of the local diagonal submatrix, and o_nz
3476    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3477    One way to choose d_nz and o_nz is to use the max nonzerors per local
3478    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3479    In this case, the values of d_nz,o_nz are:
3480 .vb
3481      proc0 : dnz = 2, o_nz = 2
3482      proc1 : dnz = 3, o_nz = 2
3483      proc2 : dnz = 1, o_nz = 4
3484 .ve
3485    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3486    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3487    for proc3. i.e we are using 12+15+10=37 storage locations to store
3488    34 values.
3489 
3490    When d_nnz, o_nnz parameters are specified, the storage is specified
3491    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3492    In the above case the values for d_nnz,o_nnz are:
3493 .vb
3494      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3495      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3496      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3497 .ve
3498    Here the space allocated is sum of all the above values i.e 34, and
3499    hence pre-allocation is perfect.
3500 
3501    Level: intermediate
3502 
3503 .keywords: matrix, aij, compressed row, sparse, parallel
3504 
3505 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3506           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3507 @*/
3508 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3509 {
3510   PetscErrorCode ierr;
3511 
3512   PetscFunctionBegin;
3513   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3514   PetscValidType(B,1);
3515   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3516   PetscFunctionReturn(0);
3517 }
3518 
3519 #undef __FUNCT__
3520 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3521 /*@
3522      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3523          CSR format the local rows.
3524 
3525    Collective on MPI_Comm
3526 
3527    Input Parameters:
3528 +  comm - MPI communicator
3529 .  m - number of local rows (Cannot be PETSC_DECIDE)
3530 .  n - This value should be the same as the local size used in creating the
3531        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3532        calculated if N is given) For square matrices n is almost always m.
3533 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3534 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3535 .   i - row indices
3536 .   j - column indices
3537 -   a - matrix values
3538 
3539    Output Parameter:
3540 .   mat - the matrix
3541 
3542    Level: intermediate
3543 
3544    Notes:
3545        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3546      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3547      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3548 
3549        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3550 
3551        The format which is used for the sparse matrix input, is equivalent to a
3552     row-major ordering.. i.e for the following matrix, the input data expected is
3553     as shown
3554 
3555 $        1 0 0
3556 $        2 0 3     P0
3557 $       -------
3558 $        4 5 6     P1
3559 $
3560 $     Process0 [P0]: rows_owned=[0,1]
3561 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3562 $        j =  {0,0,2}  [size = 3]
3563 $        v =  {1,2,3}  [size = 3]
3564 $
3565 $     Process1 [P1]: rows_owned=[2]
3566 $        i =  {0,3}    [size = nrow+1  = 1+1]
3567 $        j =  {0,1,2}  [size = 3]
3568 $        v =  {4,5,6}  [size = 3]
3569 
3570 .keywords: matrix, aij, compressed row, sparse, parallel
3571 
3572 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3573           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3574 @*/
3575 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3576 {
3577   PetscErrorCode ierr;
3578 
3579   PetscFunctionBegin;
3580   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3581   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3582   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3583   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3584   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3585   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3586   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3587   PetscFunctionReturn(0);
3588 }
3589 
3590 #undef __FUNCT__
3591 #define __FUNCT__ "MatCreateAIJ"
3592 /*@C
3593    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3594    (the default parallel PETSc format).  For good matrix assembly performance
3595    the user should preallocate the matrix storage by setting the parameters
3596    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3597    performance can be increased by more than a factor of 50.
3598 
3599    Collective on MPI_Comm
3600 
3601    Input Parameters:
3602 +  comm - MPI communicator
3603 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3604            This value should be the same as the local size used in creating the
3605            y vector for the matrix-vector product y = Ax.
3606 .  n - This value should be the same as the local size used in creating the
3607        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3608        calculated if N is given) For square matrices n is almost always m.
3609 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3610 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3611 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3612            (same value is used for all local rows)
3613 .  d_nnz - array containing the number of nonzeros in the various rows of the
3614            DIAGONAL portion of the local submatrix (possibly different for each row)
3615            or NULL, if d_nz is used to specify the nonzero structure.
3616            The size of this array is equal to the number of local rows, i.e 'm'.
3617 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3618            submatrix (same value is used for all local rows).
3619 -  o_nnz - array containing the number of nonzeros in the various rows of the
3620            OFF-DIAGONAL portion of the local submatrix (possibly different for
3621            each row) or NULL, if o_nz is used to specify the nonzero
3622            structure. The size of this array is equal to the number
3623            of local rows, i.e 'm'.
3624 
3625    Output Parameter:
3626 .  A - the matrix
3627 
3628    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3629    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3630    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3631 
3632    Notes:
3633    If the *_nnz parameter is given then the *_nz parameter is ignored
3634 
3635    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3636    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3637    storage requirements for this matrix.
3638 
3639    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3640    processor than it must be used on all processors that share the object for
3641    that argument.
3642 
3643    The user MUST specify either the local or global matrix dimensions
3644    (possibly both).
3645 
3646    The parallel matrix is partitioned across processors such that the
3647    first m0 rows belong to process 0, the next m1 rows belong to
3648    process 1, the next m2 rows belong to process 2 etc.. where
3649    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3650    values corresponding to [m x N] submatrix.
3651 
3652    The columns are logically partitioned with the n0 columns belonging
3653    to 0th partition, the next n1 columns belonging to the next
3654    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3655 
3656    The DIAGONAL portion of the local submatrix on any given processor
3657    is the submatrix corresponding to the rows and columns m,n
3658    corresponding to the given processor. i.e diagonal matrix on
3659    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3660    etc. The remaining portion of the local submatrix [m x (N-n)]
3661    constitute the OFF-DIAGONAL portion. The example below better
3662    illustrates this concept.
3663 
3664    For a square global matrix we define each processor's diagonal portion
3665    to be its local rows and the corresponding columns (a square submatrix);
3666    each processor's off-diagonal portion encompasses the remainder of the
3667    local matrix (a rectangular submatrix).
3668 
3669    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3670 
3671    When calling this routine with a single process communicator, a matrix of
3672    type SEQAIJ is returned.  If a matrix of type MATMPIAIJ is desired for this
3673    type of communicator, use the construction mechanism:
3674      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3675 
3676    By default, this format uses inodes (identical nodes) when possible.
3677    We search for consecutive rows with the same nonzero structure, thereby
3678    reusing matrix information to achieve increased efficiency.
3679 
3680    Options Database Keys:
3681 +  -mat_no_inode  - Do not use inodes
3682 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3683 -  -mat_aij_oneindex - Internally use indexing starting at 1
3684         rather than 0.  Note that when calling MatSetValues(),
3685         the user still MUST index entries starting at 0!
3686 
3687 
3688    Example usage:
3689 
3690    Consider the following 8x8 matrix with 34 non-zero values, that is
3691    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3692    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3693    as follows:
3694 
3695 .vb
3696             1  2  0  |  0  3  0  |  0  4
3697     Proc0   0  5  6  |  7  0  0  |  8  0
3698             9  0 10  | 11  0  0  | 12  0
3699     -------------------------------------
3700            13  0 14  | 15 16 17  |  0  0
3701     Proc1   0 18  0  | 19 20 21  |  0  0
3702             0  0  0  | 22 23  0  | 24  0
3703     -------------------------------------
3704     Proc2  25 26 27  |  0  0 28  | 29  0
3705            30  0  0  | 31 32 33  |  0 34
3706 .ve
3707 
3708    This can be represented as a collection of submatrices as:
3709 
3710 .vb
3711       A B C
3712       D E F
3713       G H I
3714 .ve
3715 
3716    Where the submatrices A,B,C are owned by proc0, D,E,F are
3717    owned by proc1, G,H,I are owned by proc2.
3718 
3719    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3720    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3721    The 'M','N' parameters are 8,8, and have the same values on all procs.
3722 
3723    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3724    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3725    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3726    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3727    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3728    matrix, ans [DF] as another SeqAIJ matrix.
3729 
3730    When d_nz, o_nz parameters are specified, d_nz storage elements are
3731    allocated for every row of the local diagonal submatrix, and o_nz
3732    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3733    One way to choose d_nz and o_nz is to use the max nonzerors per local
3734    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3735    In this case, the values of d_nz,o_nz are:
3736 .vb
3737      proc0 : dnz = 2, o_nz = 2
3738      proc1 : dnz = 3, o_nz = 2
3739      proc2 : dnz = 1, o_nz = 4
3740 .ve
3741    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3742    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3743    for proc3. i.e we are using 12+15+10=37 storage locations to store
3744    34 values.
3745 
3746    When d_nnz, o_nnz parameters are specified, the storage is specified
3747    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3748    In the above case the values for d_nnz,o_nnz are:
3749 .vb
3750      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3751      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3752      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3753 .ve
3754    Here the space allocated is sum of all the above values i.e 34, and
3755    hence pre-allocation is perfect.
3756 
3757    Level: intermediate
3758 
3759 .keywords: matrix, aij, compressed row, sparse, parallel
3760 
3761 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3762           MATMPIAIJ, MatCreateMPIAIJWithArrays()
3763 @*/
3764 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3765 {
3766   PetscErrorCode ierr;
3767   PetscMPIInt    size;
3768 
3769   PetscFunctionBegin;
3770   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3771   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3772   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3773   if (size > 1) {
3774     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3775     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3776   } else {
3777     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3778     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3779   }
3780   PetscFunctionReturn(0);
3781 }
3782 
3783 #undef __FUNCT__
3784 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3785 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3786 {
3787   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3788   PetscBool      flg;
3789   PetscErrorCode ierr;
3790 
3791   PetscFunctionBegin;
3792   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3793   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
3794   if (Ad)     *Ad     = a->A;
3795   if (Ao)     *Ao     = a->B;
3796   if (colmap) *colmap = a->garray;
3797   PetscFunctionReturn(0);
3798 }
3799 
3800 #undef __FUNCT__
3801 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3802 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3803 {
3804   PetscErrorCode ierr;
3805   PetscInt       m,N,i,rstart,nnz,Ii;
3806   PetscInt       *indx;
3807   PetscScalar    *values;
3808 
3809   PetscFunctionBegin;
3810   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3811   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3812     PetscInt       *dnz,*onz,sum,bs,cbs;
3813 
3814     if (n == PETSC_DECIDE) {
3815       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3816     }
3817     /* Check sum(n) = N */
3818     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3819     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3820 
3821     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3822     rstart -= m;
3823 
3824     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3825     for (i=0; i<m; i++) {
3826       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3827       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3828       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3829     }
3830 
3831     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3832     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3833     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3834     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3835     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3836     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3837     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3838   }
3839 
3840   /* numeric phase */
3841   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3842   for (i=0; i<m; i++) {
3843     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3844     Ii   = i + rstart;
3845     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3846     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3847   }
3848   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3849   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3850   PetscFunctionReturn(0);
3851 }
3852 
3853 #undef __FUNCT__
3854 #define __FUNCT__ "MatFileSplit"
3855 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3856 {
3857   PetscErrorCode    ierr;
3858   PetscMPIInt       rank;
3859   PetscInt          m,N,i,rstart,nnz;
3860   size_t            len;
3861   const PetscInt    *indx;
3862   PetscViewer       out;
3863   char              *name;
3864   Mat               B;
3865   const PetscScalar *values;
3866 
3867   PetscFunctionBegin;
3868   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3869   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3870   /* Should this be the type of the diagonal block of A? */
3871   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3872   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3873   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3874   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3875   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3876   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3877   for (i=0; i<m; i++) {
3878     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3879     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3880     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3881   }
3882   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3883   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3884 
3885   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3886   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3887   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3888   sprintf(name,"%s.%d",outfile,rank);
3889   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3890   ierr = PetscFree(name);CHKERRQ(ierr);
3891   ierr = MatView(B,out);CHKERRQ(ierr);
3892   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3893   ierr = MatDestroy(&B);CHKERRQ(ierr);
3894   PetscFunctionReturn(0);
3895 }
3896 
3897 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3898 #undef __FUNCT__
3899 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3900 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3901 {
3902   PetscErrorCode      ierr;
3903   Mat_Merge_SeqsToMPI *merge;
3904   PetscContainer      container;
3905 
3906   PetscFunctionBegin;
3907   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3908   if (container) {
3909     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3910     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3911     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3912     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3913     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3914     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3915     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3916     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3917     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3918     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3919     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3920     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3921     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3922     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3923     ierr = PetscFree(merge);CHKERRQ(ierr);
3924     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3925   }
3926   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3927   PetscFunctionReturn(0);
3928 }
3929 
3930 #include <../src/mat/utils/freespace.h>
3931 #include <petscbt.h>
3932 
3933 #undef __FUNCT__
3934 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3935 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3936 {
3937   PetscErrorCode      ierr;
3938   MPI_Comm            comm;
3939   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3940   PetscMPIInt         size,rank,taga,*len_s;
3941   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3942   PetscInt            proc,m;
3943   PetscInt            **buf_ri,**buf_rj;
3944   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3945   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3946   MPI_Request         *s_waits,*r_waits;
3947   MPI_Status          *status;
3948   MatScalar           *aa=a->a;
3949   MatScalar           **abuf_r,*ba_i;
3950   Mat_Merge_SeqsToMPI *merge;
3951   PetscContainer      container;
3952 
3953   PetscFunctionBegin;
3954   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3955   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3956 
3957   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3958   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3959 
3960   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3961   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3962 
3963   bi     = merge->bi;
3964   bj     = merge->bj;
3965   buf_ri = merge->buf_ri;
3966   buf_rj = merge->buf_rj;
3967 
3968   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3969   owners = merge->rowmap->range;
3970   len_s  = merge->len_s;
3971 
3972   /* send and recv matrix values */
3973   /*-----------------------------*/
3974   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3975   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3976 
3977   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
3978   for (proc=0,k=0; proc<size; proc++) {
3979     if (!len_s[proc]) continue;
3980     i    = owners[proc];
3981     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3982     k++;
3983   }
3984 
3985   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3986   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3987   ierr = PetscFree(status);CHKERRQ(ierr);
3988 
3989   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3990   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3991 
3992   /* insert mat values of mpimat */
3993   /*----------------------------*/
3994   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
3995   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
3996 
3997   for (k=0; k<merge->nrecv; k++) {
3998     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3999     nrows       = *(buf_ri_k[k]);
4000     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4001     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4002   }
4003 
4004   /* set values of ba */
4005   m = merge->rowmap->n;
4006   for (i=0; i<m; i++) {
4007     arow = owners[rank] + i;
4008     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4009     bnzi = bi[i+1] - bi[i];
4010     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4011 
4012     /* add local non-zero vals of this proc's seqmat into ba */
4013     anzi   = ai[arow+1] - ai[arow];
4014     aj     = a->j + ai[arow];
4015     aa     = a->a + ai[arow];
4016     nextaj = 0;
4017     for (j=0; nextaj<anzi; j++) {
4018       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4019         ba_i[j] += aa[nextaj++];
4020       }
4021     }
4022 
4023     /* add received vals into ba */
4024     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4025       /* i-th row */
4026       if (i == *nextrow[k]) {
4027         anzi   = *(nextai[k]+1) - *nextai[k];
4028         aj     = buf_rj[k] + *(nextai[k]);
4029         aa     = abuf_r[k] + *(nextai[k]);
4030         nextaj = 0;
4031         for (j=0; nextaj<anzi; j++) {
4032           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4033             ba_i[j] += aa[nextaj++];
4034           }
4035         }
4036         nextrow[k]++; nextai[k]++;
4037       }
4038     }
4039     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4040   }
4041   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4042   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4043 
4044   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4045   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4046   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4047   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4048   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4049   PetscFunctionReturn(0);
4050 }
4051 
4052 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4053 
4054 #undef __FUNCT__
4055 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4056 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4057 {
4058   PetscErrorCode      ierr;
4059   Mat                 B_mpi;
4060   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4061   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4062   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4063   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4064   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4065   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4066   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4067   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4068   MPI_Status          *status;
4069   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4070   PetscBT             lnkbt;
4071   Mat_Merge_SeqsToMPI *merge;
4072   PetscContainer      container;
4073 
4074   PetscFunctionBegin;
4075   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4076 
4077   /* make sure it is a PETSc comm */
4078   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4079   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4080   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4081 
4082   ierr = PetscNew(&merge);CHKERRQ(ierr);
4083   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4084 
4085   /* determine row ownership */
4086   /*---------------------------------------------------------*/
4087   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4088   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4089   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4090   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4091   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4092   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4093   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4094 
4095   m      = merge->rowmap->n;
4096   owners = merge->rowmap->range;
4097 
4098   /* determine the number of messages to send, their lengths */
4099   /*---------------------------------------------------------*/
4100   len_s = merge->len_s;
4101 
4102   len          = 0; /* length of buf_si[] */
4103   merge->nsend = 0;
4104   for (proc=0; proc<size; proc++) {
4105     len_si[proc] = 0;
4106     if (proc == rank) {
4107       len_s[proc] = 0;
4108     } else {
4109       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4110       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4111     }
4112     if (len_s[proc]) {
4113       merge->nsend++;
4114       nrows = 0;
4115       for (i=owners[proc]; i<owners[proc+1]; i++) {
4116         if (ai[i+1] > ai[i]) nrows++;
4117       }
4118       len_si[proc] = 2*(nrows+1);
4119       len         += len_si[proc];
4120     }
4121   }
4122 
4123   /* determine the number and length of messages to receive for ij-structure */
4124   /*-------------------------------------------------------------------------*/
4125   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4126   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4127 
4128   /* post the Irecv of j-structure */
4129   /*-------------------------------*/
4130   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4131   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4132 
4133   /* post the Isend of j-structure */
4134   /*--------------------------------*/
4135   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4136 
4137   for (proc=0, k=0; proc<size; proc++) {
4138     if (!len_s[proc]) continue;
4139     i    = owners[proc];
4140     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4141     k++;
4142   }
4143 
4144   /* receives and sends of j-structure are complete */
4145   /*------------------------------------------------*/
4146   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4147   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4148 
4149   /* send and recv i-structure */
4150   /*---------------------------*/
4151   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4152   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4153 
4154   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4155   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4156   for (proc=0,k=0; proc<size; proc++) {
4157     if (!len_s[proc]) continue;
4158     /* form outgoing message for i-structure:
4159          buf_si[0]:                 nrows to be sent
4160                [1:nrows]:           row index (global)
4161                [nrows+1:2*nrows+1]: i-structure index
4162     */
4163     /*-------------------------------------------*/
4164     nrows       = len_si[proc]/2 - 1;
4165     buf_si_i    = buf_si + nrows+1;
4166     buf_si[0]   = nrows;
4167     buf_si_i[0] = 0;
4168     nrows       = 0;
4169     for (i=owners[proc]; i<owners[proc+1]; i++) {
4170       anzi = ai[i+1] - ai[i];
4171       if (anzi) {
4172         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4173         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4174         nrows++;
4175       }
4176     }
4177     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4178     k++;
4179     buf_si += len_si[proc];
4180   }
4181 
4182   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4183   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4184 
4185   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4186   for (i=0; i<merge->nrecv; i++) {
4187     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4188   }
4189 
4190   ierr = PetscFree(len_si);CHKERRQ(ierr);
4191   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4192   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4193   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4194   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4195   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4196   ierr = PetscFree(status);CHKERRQ(ierr);
4197 
4198   /* compute a local seq matrix in each processor */
4199   /*----------------------------------------------*/
4200   /* allocate bi array and free space for accumulating nonzero column info */
4201   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4202   bi[0] = 0;
4203 
4204   /* create and initialize a linked list */
4205   nlnk = N+1;
4206   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4207 
4208   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4209   len  = ai[owners[rank+1]] - ai[owners[rank]];
4210   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4211 
4212   current_space = free_space;
4213 
4214   /* determine symbolic info for each local row */
4215   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4216 
4217   for (k=0; k<merge->nrecv; k++) {
4218     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4219     nrows       = *buf_ri_k[k];
4220     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4221     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4222   }
4223 
4224   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4225   len  = 0;
4226   for (i=0; i<m; i++) {
4227     bnzi = 0;
4228     /* add local non-zero cols of this proc's seqmat into lnk */
4229     arow  = owners[rank] + i;
4230     anzi  = ai[arow+1] - ai[arow];
4231     aj    = a->j + ai[arow];
4232     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4233     bnzi += nlnk;
4234     /* add received col data into lnk */
4235     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4236       if (i == *nextrow[k]) { /* i-th row */
4237         anzi  = *(nextai[k]+1) - *nextai[k];
4238         aj    = buf_rj[k] + *nextai[k];
4239         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4240         bnzi += nlnk;
4241         nextrow[k]++; nextai[k]++;
4242       }
4243     }
4244     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4245 
4246     /* if free space is not available, make more free space */
4247     if (current_space->local_remaining<bnzi) {
4248       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4249       nspacedouble++;
4250     }
4251     /* copy data into free space, then initialize lnk */
4252     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4253     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4254 
4255     current_space->array           += bnzi;
4256     current_space->local_used      += bnzi;
4257     current_space->local_remaining -= bnzi;
4258 
4259     bi[i+1] = bi[i] + bnzi;
4260   }
4261 
4262   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4263 
4264   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4265   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4266   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4267 
4268   /* create symbolic parallel matrix B_mpi */
4269   /*---------------------------------------*/
4270   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4271   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4272   if (n==PETSC_DECIDE) {
4273     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4274   } else {
4275     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4276   }
4277   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4278   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4279   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4280   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4281   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4282 
4283   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4284   B_mpi->assembled    = PETSC_FALSE;
4285   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4286   merge->bi           = bi;
4287   merge->bj           = bj;
4288   merge->buf_ri       = buf_ri;
4289   merge->buf_rj       = buf_rj;
4290   merge->coi          = NULL;
4291   merge->coj          = NULL;
4292   merge->owners_co    = NULL;
4293 
4294   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4295 
4296   /* attach the supporting struct to B_mpi for reuse */
4297   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4298   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4299   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4300   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4301   *mpimat = B_mpi;
4302 
4303   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4304   PetscFunctionReturn(0);
4305 }
4306 
4307 #undef __FUNCT__
4308 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4309 /*@C
4310       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4311                  matrices from each processor
4312 
4313     Collective on MPI_Comm
4314 
4315    Input Parameters:
4316 +    comm - the communicators the parallel matrix will live on
4317 .    seqmat - the input sequential matrices
4318 .    m - number of local rows (or PETSC_DECIDE)
4319 .    n - number of local columns (or PETSC_DECIDE)
4320 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4321 
4322    Output Parameter:
4323 .    mpimat - the parallel matrix generated
4324 
4325     Level: advanced
4326 
4327    Notes:
4328      The dimensions of the sequential matrix in each processor MUST be the same.
4329      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4330      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4331 @*/
4332 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4333 {
4334   PetscErrorCode ierr;
4335   PetscMPIInt    size;
4336 
4337   PetscFunctionBegin;
4338   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4339   if (size == 1) {
4340     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4341     if (scall == MAT_INITIAL_MATRIX) {
4342       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4343     } else {
4344       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4345     }
4346     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4347     PetscFunctionReturn(0);
4348   }
4349   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4350   if (scall == MAT_INITIAL_MATRIX) {
4351     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4352   }
4353   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4354   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4355   PetscFunctionReturn(0);
4356 }
4357 
4358 #undef __FUNCT__
4359 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4360 /*@
4361      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4362           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4363           with MatGetSize()
4364 
4365     Not Collective
4366 
4367    Input Parameters:
4368 +    A - the matrix
4369 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4370 
4371    Output Parameter:
4372 .    A_loc - the local sequential matrix generated
4373 
4374     Level: developer
4375 
4376 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4377 
4378 @*/
4379 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4380 {
4381   PetscErrorCode ierr;
4382   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4383   Mat_SeqAIJ     *mat,*a,*b;
4384   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4385   MatScalar      *aa,*ba,*cam;
4386   PetscScalar    *ca;
4387   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4388   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4389   PetscBool      match;
4390   MPI_Comm       comm;
4391   PetscMPIInt    size;
4392 
4393   PetscFunctionBegin;
4394   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4395   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4396   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4397   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4398   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4399 
4400   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4401   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4402   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4403   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4404   aa = a->a; ba = b->a;
4405   if (scall == MAT_INITIAL_MATRIX) {
4406     if (size == 1) {
4407       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4408       PetscFunctionReturn(0);
4409     }
4410 
4411     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4412     ci[0] = 0;
4413     for (i=0; i<am; i++) {
4414       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4415     }
4416     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4417     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4418     k    = 0;
4419     for (i=0; i<am; i++) {
4420       ncols_o = bi[i+1] - bi[i];
4421       ncols_d = ai[i+1] - ai[i];
4422       /* off-diagonal portion of A */
4423       for (jo=0; jo<ncols_o; jo++) {
4424         col = cmap[*bj];
4425         if (col >= cstart) break;
4426         cj[k]   = col; bj++;
4427         ca[k++] = *ba++;
4428       }
4429       /* diagonal portion of A */
4430       for (j=0; j<ncols_d; j++) {
4431         cj[k]   = cstart + *aj++;
4432         ca[k++] = *aa++;
4433       }
4434       /* off-diagonal portion of A */
4435       for (j=jo; j<ncols_o; j++) {
4436         cj[k]   = cmap[*bj++];
4437         ca[k++] = *ba++;
4438       }
4439     }
4440     /* put together the new matrix */
4441     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4442     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4443     /* Since these are PETSc arrays, change flags to free them as necessary. */
4444     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4445     mat->free_a  = PETSC_TRUE;
4446     mat->free_ij = PETSC_TRUE;
4447     mat->nonew   = 0;
4448   } else if (scall == MAT_REUSE_MATRIX) {
4449     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4450     ci = mat->i; cj = mat->j; cam = mat->a;
4451     for (i=0; i<am; i++) {
4452       /* off-diagonal portion of A */
4453       ncols_o = bi[i+1] - bi[i];
4454       for (jo=0; jo<ncols_o; jo++) {
4455         col = cmap[*bj];
4456         if (col >= cstart) break;
4457         *cam++ = *ba++; bj++;
4458       }
4459       /* diagonal portion of A */
4460       ncols_d = ai[i+1] - ai[i];
4461       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4462       /* off-diagonal portion of A */
4463       for (j=jo; j<ncols_o; j++) {
4464         *cam++ = *ba++; bj++;
4465       }
4466     }
4467   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4468   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4469   PetscFunctionReturn(0);
4470 }
4471 
4472 #undef __FUNCT__
4473 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4474 /*@C
4475      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4476 
4477     Not Collective
4478 
4479    Input Parameters:
4480 +    A - the matrix
4481 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4482 -    row, col - index sets of rows and columns to extract (or NULL)
4483 
4484    Output Parameter:
4485 .    A_loc - the local sequential matrix generated
4486 
4487     Level: developer
4488 
4489 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4490 
4491 @*/
4492 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4493 {
4494   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4495   PetscErrorCode ierr;
4496   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4497   IS             isrowa,iscola;
4498   Mat            *aloc;
4499   PetscBool      match;
4500 
4501   PetscFunctionBegin;
4502   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4503   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4504   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4505   if (!row) {
4506     start = A->rmap->rstart; end = A->rmap->rend;
4507     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4508   } else {
4509     isrowa = *row;
4510   }
4511   if (!col) {
4512     start = A->cmap->rstart;
4513     cmap  = a->garray;
4514     nzA   = a->A->cmap->n;
4515     nzB   = a->B->cmap->n;
4516     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4517     ncols = 0;
4518     for (i=0; i<nzB; i++) {
4519       if (cmap[i] < start) idx[ncols++] = cmap[i];
4520       else break;
4521     }
4522     imark = i;
4523     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4524     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4525     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4526   } else {
4527     iscola = *col;
4528   }
4529   if (scall != MAT_INITIAL_MATRIX) {
4530     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4531     aloc[0] = *A_loc;
4532   }
4533   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4534   *A_loc = aloc[0];
4535   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4536   if (!row) {
4537     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4538   }
4539   if (!col) {
4540     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4541   }
4542   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4543   PetscFunctionReturn(0);
4544 }
4545 
4546 #undef __FUNCT__
4547 #define __FUNCT__ "MatGetBrowsOfAcols"
4548 /*@C
4549     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4550 
4551     Collective on Mat
4552 
4553    Input Parameters:
4554 +    A,B - the matrices in mpiaij format
4555 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4556 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4557 
4558    Output Parameter:
4559 +    rowb, colb - index sets of rows and columns of B to extract
4560 -    B_seq - the sequential matrix generated
4561 
4562     Level: developer
4563 
4564 @*/
4565 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4566 {
4567   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4568   PetscErrorCode ierr;
4569   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4570   IS             isrowb,iscolb;
4571   Mat            *bseq=NULL;
4572 
4573   PetscFunctionBegin;
4574   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4575     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4576   }
4577   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4578 
4579   if (scall == MAT_INITIAL_MATRIX) {
4580     start = A->cmap->rstart;
4581     cmap  = a->garray;
4582     nzA   = a->A->cmap->n;
4583     nzB   = a->B->cmap->n;
4584     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4585     ncols = 0;
4586     for (i=0; i<nzB; i++) {  /* row < local row index */
4587       if (cmap[i] < start) idx[ncols++] = cmap[i];
4588       else break;
4589     }
4590     imark = i;
4591     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4592     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4593     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4594     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4595   } else {
4596     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4597     isrowb  = *rowb; iscolb = *colb;
4598     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4599     bseq[0] = *B_seq;
4600   }
4601   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4602   *B_seq = bseq[0];
4603   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4604   if (!rowb) {
4605     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4606   } else {
4607     *rowb = isrowb;
4608   }
4609   if (!colb) {
4610     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4611   } else {
4612     *colb = iscolb;
4613   }
4614   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4615   PetscFunctionReturn(0);
4616 }
4617 
4618 #undef __FUNCT__
4619 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4620 /*
4621     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4622     of the OFF-DIAGONAL portion of local A
4623 
4624     Collective on Mat
4625 
4626    Input Parameters:
4627 +    A,B - the matrices in mpiaij format
4628 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4629 
4630    Output Parameter:
4631 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4632 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4633 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4634 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4635 
4636     Level: developer
4637 
4638 */
4639 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4640 {
4641   VecScatter_MPI_General *gen_to,*gen_from;
4642   PetscErrorCode         ierr;
4643   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4644   Mat_SeqAIJ             *b_oth;
4645   VecScatter             ctx =a->Mvctx;
4646   MPI_Comm               comm;
4647   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4648   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4649   PetscScalar            *rvalues,*svalues;
4650   MatScalar              *b_otha,*bufa,*bufA;
4651   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4652   MPI_Request            *rwaits = NULL,*swaits = NULL;
4653   MPI_Status             *sstatus,rstatus;
4654   PetscMPIInt            jj,size;
4655   PetscInt               *cols,sbs,rbs;
4656   PetscScalar            *vals;
4657 
4658   PetscFunctionBegin;
4659   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4660   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4661 
4662   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4663     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4664   }
4665   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4666   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4667 
4668   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4669   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4670   rvalues  = gen_from->values; /* holds the length of receiving row */
4671   svalues  = gen_to->values;   /* holds the length of sending row */
4672   nrecvs   = gen_from->n;
4673   nsends   = gen_to->n;
4674 
4675   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4676   srow    = gen_to->indices;    /* local row index to be sent */
4677   sstarts = gen_to->starts;
4678   sprocs  = gen_to->procs;
4679   sstatus = gen_to->sstatus;
4680   sbs     = gen_to->bs;
4681   rstarts = gen_from->starts;
4682   rprocs  = gen_from->procs;
4683   rbs     = gen_from->bs;
4684 
4685   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4686   if (scall == MAT_INITIAL_MATRIX) {
4687     /* i-array */
4688     /*---------*/
4689     /*  post receives */
4690     for (i=0; i<nrecvs; i++) {
4691       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4692       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4693       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4694     }
4695 
4696     /* pack the outgoing message */
4697     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4698 
4699     sstartsj[0] = 0;
4700     rstartsj[0] = 0;
4701     len         = 0; /* total length of j or a array to be sent */
4702     k           = 0;
4703     for (i=0; i<nsends; i++) {
4704       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4705       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4706       for (j=0; j<nrows; j++) {
4707         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4708         for (l=0; l<sbs; l++) {
4709           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4710 
4711           rowlen[j*sbs+l] = ncols;
4712 
4713           len += ncols;
4714           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4715         }
4716         k++;
4717       }
4718       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4719 
4720       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4721     }
4722     /* recvs and sends of i-array are completed */
4723     i = nrecvs;
4724     while (i--) {
4725       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4726     }
4727     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4728 
4729     /* allocate buffers for sending j and a arrays */
4730     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4731     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4732 
4733     /* create i-array of B_oth */
4734     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4735 
4736     b_othi[0] = 0;
4737     len       = 0; /* total length of j or a array to be received */
4738     k         = 0;
4739     for (i=0; i<nrecvs; i++) {
4740       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4741       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4742       for (j=0; j<nrows; j++) {
4743         b_othi[k+1] = b_othi[k] + rowlen[j];
4744         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4745         k++;
4746       }
4747       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4748     }
4749 
4750     /* allocate space for j and a arrrays of B_oth */
4751     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4752     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4753 
4754     /* j-array */
4755     /*---------*/
4756     /*  post receives of j-array */
4757     for (i=0; i<nrecvs; i++) {
4758       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4759       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4760     }
4761 
4762     /* pack the outgoing message j-array */
4763     k = 0;
4764     for (i=0; i<nsends; i++) {
4765       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4766       bufJ  = bufj+sstartsj[i];
4767       for (j=0; j<nrows; j++) {
4768         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4769         for (ll=0; ll<sbs; ll++) {
4770           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4771           for (l=0; l<ncols; l++) {
4772             *bufJ++ = cols[l];
4773           }
4774           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4775         }
4776       }
4777       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4778     }
4779 
4780     /* recvs and sends of j-array are completed */
4781     i = nrecvs;
4782     while (i--) {
4783       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4784     }
4785     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4786   } else if (scall == MAT_REUSE_MATRIX) {
4787     sstartsj = *startsj_s;
4788     rstartsj = *startsj_r;
4789     bufa     = *bufa_ptr;
4790     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4791     b_otha   = b_oth->a;
4792   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4793 
4794   /* a-array */
4795   /*---------*/
4796   /*  post receives of a-array */
4797   for (i=0; i<nrecvs; i++) {
4798     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4799     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4800   }
4801 
4802   /* pack the outgoing message a-array */
4803   k = 0;
4804   for (i=0; i<nsends; i++) {
4805     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4806     bufA  = bufa+sstartsj[i];
4807     for (j=0; j<nrows; j++) {
4808       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4809       for (ll=0; ll<sbs; ll++) {
4810         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4811         for (l=0; l<ncols; l++) {
4812           *bufA++ = vals[l];
4813         }
4814         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4815       }
4816     }
4817     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4818   }
4819   /* recvs and sends of a-array are completed */
4820   i = nrecvs;
4821   while (i--) {
4822     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4823   }
4824   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4825   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4826 
4827   if (scall == MAT_INITIAL_MATRIX) {
4828     /* put together the new matrix */
4829     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4830 
4831     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4832     /* Since these are PETSc arrays, change flags to free them as necessary. */
4833     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4834     b_oth->free_a  = PETSC_TRUE;
4835     b_oth->free_ij = PETSC_TRUE;
4836     b_oth->nonew   = 0;
4837 
4838     ierr = PetscFree(bufj);CHKERRQ(ierr);
4839     if (!startsj_s || !bufa_ptr) {
4840       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4841       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4842     } else {
4843       *startsj_s = sstartsj;
4844       *startsj_r = rstartsj;
4845       *bufa_ptr  = bufa;
4846     }
4847   }
4848   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4849   PetscFunctionReturn(0);
4850 }
4851 
4852 #undef __FUNCT__
4853 #define __FUNCT__ "MatGetCommunicationStructs"
4854 /*@C
4855   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4856 
4857   Not Collective
4858 
4859   Input Parameters:
4860 . A - The matrix in mpiaij format
4861 
4862   Output Parameter:
4863 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4864 . colmap - A map from global column index to local index into lvec
4865 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4866 
4867   Level: developer
4868 
4869 @*/
4870 #if defined(PETSC_USE_CTABLE)
4871 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4872 #else
4873 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4874 #endif
4875 {
4876   Mat_MPIAIJ *a;
4877 
4878   PetscFunctionBegin;
4879   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4880   PetscValidPointer(lvec, 2);
4881   PetscValidPointer(colmap, 3);
4882   PetscValidPointer(multScatter, 4);
4883   a = (Mat_MPIAIJ*) A->data;
4884   if (lvec) *lvec = a->lvec;
4885   if (colmap) *colmap = a->colmap;
4886   if (multScatter) *multScatter = a->Mvctx;
4887   PetscFunctionReturn(0);
4888 }
4889 
4890 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4891 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4892 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4893 #if defined(PETSC_HAVE_ELEMENTAL)
4894 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4895 #endif
4896 #if defined(PETSC_HAVE_HYPRE)
4897 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
4898 #endif
4899 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
4900 
4901 #undef __FUNCT__
4902 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4903 /*
4904     Computes (B'*A')' since computing B*A directly is untenable
4905 
4906                n                       p                          p
4907         (              )       (              )         (                  )
4908       m (      A       )  *  n (       B      )   =   m (         C        )
4909         (              )       (              )         (                  )
4910 
4911 */
4912 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4913 {
4914   PetscErrorCode ierr;
4915   Mat            At,Bt,Ct;
4916 
4917   PetscFunctionBegin;
4918   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4919   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4920   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4921   ierr = MatDestroy(&At);CHKERRQ(ierr);
4922   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4923   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4924   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4925   PetscFunctionReturn(0);
4926 }
4927 
4928 #undef __FUNCT__
4929 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4930 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4931 {
4932   PetscErrorCode ierr;
4933   PetscInt       m=A->rmap->n,n=B->cmap->n;
4934   Mat            Cmat;
4935 
4936   PetscFunctionBegin;
4937   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4938   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4939   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4940   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4941   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4942   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4943   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4944   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4945 
4946   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4947 
4948   *C = Cmat;
4949   PetscFunctionReturn(0);
4950 }
4951 
4952 /* ----------------------------------------------------------------*/
4953 #undef __FUNCT__
4954 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4955 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4956 {
4957   PetscErrorCode ierr;
4958 
4959   PetscFunctionBegin;
4960   if (scall == MAT_INITIAL_MATRIX) {
4961     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4962     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4963     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4964   }
4965   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4966   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4967   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4968   PetscFunctionReturn(0);
4969 }
4970 
4971 /*MC
4972    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4973 
4974    Options Database Keys:
4975 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4976 
4977   Level: beginner
4978 
4979 .seealso: MatCreateAIJ()
4980 M*/
4981 
4982 #undef __FUNCT__
4983 #define __FUNCT__ "MatCreate_MPIAIJ"
4984 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
4985 {
4986   Mat_MPIAIJ     *b;
4987   PetscErrorCode ierr;
4988   PetscMPIInt    size;
4989 
4990   PetscFunctionBegin;
4991   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
4992 
4993   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
4994   B->data       = (void*)b;
4995   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4996   B->assembled  = PETSC_FALSE;
4997   B->insertmode = NOT_SET_VALUES;
4998   b->size       = size;
4999 
5000   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5001 
5002   /* build cache for off array entries formed */
5003   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5004 
5005   b->donotstash  = PETSC_FALSE;
5006   b->colmap      = 0;
5007   b->garray      = 0;
5008   b->roworiented = PETSC_TRUE;
5009 
5010   /* stuff used for matrix vector multiply */
5011   b->lvec  = NULL;
5012   b->Mvctx = NULL;
5013 
5014   /* stuff for MatGetRow() */
5015   b->rowindices   = 0;
5016   b->rowvalues    = 0;
5017   b->getrowactive = PETSC_FALSE;
5018 
5019   /* flexible pointer used in CUSP/CUSPARSE classes */
5020   b->spptr = NULL;
5021 
5022   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5023   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5024   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5025   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5026   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5027   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5029   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5030   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5031   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5032 #if defined(PETSC_HAVE_ELEMENTAL)
5033   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5034 #endif
5035 #if defined(PETSC_HAVE_HYPRE)
5036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5037 #endif
5038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5042   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5043   PetscFunctionReturn(0);
5044 }
5045 
5046 #undef __FUNCT__
5047 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5048 /*@C
5049      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5050          and "off-diagonal" part of the matrix in CSR format.
5051 
5052    Collective on MPI_Comm
5053 
5054    Input Parameters:
5055 +  comm - MPI communicator
5056 .  m - number of local rows (Cannot be PETSC_DECIDE)
5057 .  n - This value should be the same as the local size used in creating the
5058        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5059        calculated if N is given) For square matrices n is almost always m.
5060 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5061 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5062 .   i - row indices for "diagonal" portion of matrix
5063 .   j - column indices
5064 .   a - matrix values
5065 .   oi - row indices for "off-diagonal" portion of matrix
5066 .   oj - column indices
5067 -   oa - matrix values
5068 
5069    Output Parameter:
5070 .   mat - the matrix
5071 
5072    Level: advanced
5073 
5074    Notes:
5075        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5076        must free the arrays once the matrix has been destroyed and not before.
5077 
5078        The i and j indices are 0 based
5079 
5080        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5081 
5082        This sets local rows and cannot be used to set off-processor values.
5083 
5084        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5085        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5086        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5087        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5088        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5089        communication if it is known that only local entries will be set.
5090 
5091 .keywords: matrix, aij, compressed row, sparse, parallel
5092 
5093 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5094           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5095 @*/
5096 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5097 {
5098   PetscErrorCode ierr;
5099   Mat_MPIAIJ     *maij;
5100 
5101   PetscFunctionBegin;
5102   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5103   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5104   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5105   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5106   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5107   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5108   maij = (Mat_MPIAIJ*) (*mat)->data;
5109 
5110   (*mat)->preallocated = PETSC_TRUE;
5111 
5112   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5113   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5114 
5115   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5116   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5117 
5118   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5119   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5120   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5121   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5122 
5123   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5124   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5125   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5126   PetscFunctionReturn(0);
5127 }
5128 
5129 /*
5130     Special version for direct calls from Fortran
5131 */
5132 #include <petsc/private/fortranimpl.h>
5133 
5134 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5135 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5136 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5137 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5138 #endif
5139 
5140 /* Change these macros so can be used in void function */
5141 #undef CHKERRQ
5142 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5143 #undef SETERRQ2
5144 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5145 #undef SETERRQ3
5146 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5147 #undef SETERRQ
5148 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5149 
5150 #undef __FUNCT__
5151 #define __FUNCT__ "matsetvaluesmpiaij_"
5152 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5153 {
5154   Mat            mat  = *mmat;
5155   PetscInt       m    = *mm, n = *mn;
5156   InsertMode     addv = *maddv;
5157   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5158   PetscScalar    value;
5159   PetscErrorCode ierr;
5160 
5161   MatCheckPreallocated(mat,1);
5162   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5163 
5164 #if defined(PETSC_USE_DEBUG)
5165   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5166 #endif
5167   {
5168     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5169     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5170     PetscBool roworiented = aij->roworiented;
5171 
5172     /* Some Variables required in the macro */
5173     Mat        A                 = aij->A;
5174     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5175     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5176     MatScalar  *aa               = a->a;
5177     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5178     Mat        B                 = aij->B;
5179     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5180     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5181     MatScalar  *ba               = b->a;
5182 
5183     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5184     PetscInt  nonew = a->nonew;
5185     MatScalar *ap1,*ap2;
5186 
5187     PetscFunctionBegin;
5188     for (i=0; i<m; i++) {
5189       if (im[i] < 0) continue;
5190 #if defined(PETSC_USE_DEBUG)
5191       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5192 #endif
5193       if (im[i] >= rstart && im[i] < rend) {
5194         row      = im[i] - rstart;
5195         lastcol1 = -1;
5196         rp1      = aj + ai[row];
5197         ap1      = aa + ai[row];
5198         rmax1    = aimax[row];
5199         nrow1    = ailen[row];
5200         low1     = 0;
5201         high1    = nrow1;
5202         lastcol2 = -1;
5203         rp2      = bj + bi[row];
5204         ap2      = ba + bi[row];
5205         rmax2    = bimax[row];
5206         nrow2    = bilen[row];
5207         low2     = 0;
5208         high2    = nrow2;
5209 
5210         for (j=0; j<n; j++) {
5211           if (roworiented) value = v[i*n+j];
5212           else value = v[i+j*m];
5213           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5214           if (in[j] >= cstart && in[j] < cend) {
5215             col = in[j] - cstart;
5216             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5217           } else if (in[j] < 0) continue;
5218 #if defined(PETSC_USE_DEBUG)
5219           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5220 #endif
5221           else {
5222             if (mat->was_assembled) {
5223               if (!aij->colmap) {
5224                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5225               }
5226 #if defined(PETSC_USE_CTABLE)
5227               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5228               col--;
5229 #else
5230               col = aij->colmap[in[j]] - 1;
5231 #endif
5232               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5233                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5234                 col  =  in[j];
5235                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5236                 B     = aij->B;
5237                 b     = (Mat_SeqAIJ*)B->data;
5238                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5239                 rp2   = bj + bi[row];
5240                 ap2   = ba + bi[row];
5241                 rmax2 = bimax[row];
5242                 nrow2 = bilen[row];
5243                 low2  = 0;
5244                 high2 = nrow2;
5245                 bm    = aij->B->rmap->n;
5246                 ba    = b->a;
5247               }
5248             } else col = in[j];
5249             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5250           }
5251         }
5252       } else if (!aij->donotstash) {
5253         if (roworiented) {
5254           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5255         } else {
5256           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5257         }
5258       }
5259     }
5260   }
5261   PetscFunctionReturnVoid();
5262 }
5263 
5264