xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3dad0653c8b7d79802f38d512f3aec944c073f92)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *lrows;
787   PetscInt       r, len;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791   /* get locally owned rows */
792   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
793   /* fix right hand side if needed */
794   if (x && b) {
795     const PetscScalar *xx;
796     PetscScalar       *bb;
797 
798     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
799     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
800     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
801     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
802     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
803   }
804   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
805   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
806   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
807     PetscBool cong;
808     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
809     if (cong) A->congruentlayouts = 1;
810     else      A->congruentlayouts = 0;
811   }
812   if ((diag != 0.0) && A->congruentlayouts) {
813     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
814   } else if (diag != 0.0) {
815     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827 
828   /* only change matrix nonzero state if pattern was allowed to be changed */
829   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
830     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
831     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,p = 0,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x) {
888     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
891     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
892   }
893   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
894   /* remove zeroed rows of off diagonal matrix */
895   ii = aij->i;
896   for (i=0; i<len; i++) {
897     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
898   }
899   /* loop over all elements of off process part of matrix zeroing removed columns*/
900   if (aij->compressedrow.use) {
901     m    = aij->compressedrow.nrows;
902     ii   = aij->compressedrow.i;
903     ridx = aij->compressedrow.rindex;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908 
909       for (j=0; j<n; j++) {
910         if (PetscAbsScalar(mask[*aj])) {
911           if (b) bb[*ridx] -= *aa*xx[*aj];
912           *aa = 0.0;
913         }
914         aa++;
915         aj++;
916       }
917       ridx++;
918     }
919   } else { /* do not use compressed row format */
920     m = l->B->rmap->n;
921     for (i=0; i<m; i++) {
922       n  = ii[i+1] - ii[i];
923       aj = aij->j + ii[i];
924       aa = aij->a + ii[i];
925       for (j=0; j<n; j++) {
926         if (PetscAbsScalar(mask[*aj])) {
927           if (b) bb[i] -= *aa*xx[*aj];
928           *aa = 0.0;
929         }
930         aa++;
931         aj++;
932       }
933     }
934   }
935   if (x) {
936     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
937     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
938   }
939   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
940   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
941   ierr = PetscFree(lrows);CHKERRQ(ierr);
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMult_MPIAIJ"
953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
954 {
955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       nt;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
962   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMultAdd_MPIAIJ"
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002   PetscBool      merged;
1003 
1004   PetscFunctionBegin;
1005   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1006   /* do nondiagonal part */
1007   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1008   if (!merged) {
1009     /* send it on its way */
1010     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* receive remote parts: note this assumes the values are not actually */
1014     /* added in yy until the next line, */
1015     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   } else {
1017     /* do local part */
1018     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1019     /* send it on its way */
1020     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021     /* values actually were received in the Begin() but we need to call this nop */
1022     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1029 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1030 {
1031   MPI_Comm       comm;
1032   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1033   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1034   IS             Me,Notme;
1035   PetscErrorCode ierr;
1036   PetscInt       M,N,first,last,*notme,i;
1037   PetscMPIInt    size;
1038 
1039   PetscFunctionBegin;
1040   /* Easy test: symmetric diagonal block */
1041   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1042   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   /* do nondiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   /* send it on its way */
1080   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   /* do local part */
1082   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* receive remote parts */
1084   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /*
1089   This only works correctly for square matrices where the subblock A->A is the
1090    diagonal block
1091 */
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1095 {
1096   PetscErrorCode ierr;
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1101   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1102   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatScale_MPIAIJ"
1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1115   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatDestroy_MPIAIJ"
1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1122 {
1123   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127 #if defined(PETSC_USE_LOG)
1128   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1129 #endif
1130   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1131   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1132   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1133   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1134 #if defined(PETSC_USE_CTABLE)
1135   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1136 #else
1137   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1138 #endif
1139   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1140   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1141   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1142   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1143   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1144   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1145 
1146   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1154 #if defined(PETSC_HAVE_ELEMENTAL)
1155   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1156 #endif
1157 #if defined(PETSC_HAVE_HYPRE)
1158   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1159   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1160 #endif
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1166 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1167 {
1168   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1169   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1170   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1171   PetscErrorCode ierr;
1172   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1173   int            fd;
1174   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1175   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1176   PetscScalar    *column_values;
1177   PetscInt       message_count,flowcontrolcount;
1178   FILE           *file;
1179 
1180   PetscFunctionBegin;
1181   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1182   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1183   nz   = A->nz + B->nz;
1184   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1185   if (!rank) {
1186     header[0] = MAT_FILE_CLASSID;
1187     header[1] = mat->rmap->N;
1188     header[2] = mat->cmap->N;
1189 
1190     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1191     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1192     /* get largest number of rows any processor has */
1193     rlen  = mat->rmap->n;
1194     range = mat->rmap->range;
1195     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1196   } else {
1197     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1198     rlen = mat->rmap->n;
1199   }
1200 
1201   /* load up the local row counts */
1202   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1203   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1204 
1205   /* store the row lengths to the file */
1206   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1207   if (!rank) {
1208     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1209     for (i=1; i<size; i++) {
1210       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1211       rlen = range[i+1] - range[i];
1212       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1213       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1214     }
1215     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1216   } else {
1217     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1218     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1220   }
1221   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1222 
1223   /* load up the local column indices */
1224   nzmax = nz; /* th processor needs space a largest processor needs */
1225   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1227   cnt   = 0;
1228   for (i=0; i<mat->rmap->n; i++) {
1229     for (j=B->i[i]; j<B->i[i+1]; j++) {
1230       if ((col = garray[B->j[j]]) > cstart) break;
1231       column_indices[cnt++] = col;
1232     }
1233     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1234     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1235   }
1236   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1237 
1238   /* store the column indices to the file */
1239   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1240   if (!rank) {
1241     MPI_Status status;
1242     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1243     for (i=1; i<size; i++) {
1244       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1245       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1246       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1247       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1248       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1249     }
1250     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1251   } else {
1252     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1253     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1255     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1256   }
1257   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1258 
1259   /* load up the local column values */
1260   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1261   cnt  = 0;
1262   for (i=0; i<mat->rmap->n; i++) {
1263     for (j=B->i[i]; j<B->i[i+1]; j++) {
1264       if (garray[B->j[j]] > cstart) break;
1265       column_values[cnt++] = B->a[j];
1266     }
1267     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1268     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1269   }
1270   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1271 
1272   /* store the column values to the file */
1273   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1274   if (!rank) {
1275     MPI_Status status;
1276     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1277     for (i=1; i<size; i++) {
1278       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1279       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1280       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1281       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1283     }
1284     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1285   } else {
1286     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1287     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1288     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1289     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1290   }
1291   ierr = PetscFree(column_values);CHKERRQ(ierr);
1292 
1293   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1294   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #include <petscdraw.h>
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1301 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1302 {
1303   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1304   PetscErrorCode    ierr;
1305   PetscMPIInt       rank = aij->rank,size = aij->size;
1306   PetscBool         isdraw,iascii,isbinary;
1307   PetscViewer       sviewer;
1308   PetscViewerFormat format;
1309 
1310   PetscFunctionBegin;
1311   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1312   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1313   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1314   if (iascii) {
1315     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1316     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1317       MatInfo   info;
1318       PetscBool inodes;
1319 
1320       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1321       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1322       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1323       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1324       if (!inodes) {
1325         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1326                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1327       } else {
1328         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1329                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1330       }
1331       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1332       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1333       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1334       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1335       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1336       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1337       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1338       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1339       PetscFunctionReturn(0);
1340     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1341       PetscInt inodecount,inodelimit,*inodes;
1342       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1343       if (inodes) {
1344         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1345       } else {
1346         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1347       }
1348       PetscFunctionReturn(0);
1349     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1350       PetscFunctionReturn(0);
1351     }
1352   } else if (isbinary) {
1353     if (size == 1) {
1354       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1355       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1356     } else {
1357       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1358     }
1359     PetscFunctionReturn(0);
1360   } else if (isdraw) {
1361     PetscDraw draw;
1362     PetscBool isnull;
1363     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1364     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1365     if (isnull) PetscFunctionReturn(0);
1366   }
1367 
1368   {
1369     /* assemble the entire matrix onto first processor. */
1370     Mat        A;
1371     Mat_SeqAIJ *Aloc;
1372     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1373     MatScalar  *a;
1374 
1375     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1376     if (!rank) {
1377       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1378     } else {
1379       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1380     }
1381     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1382     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1383     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1384     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1385     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1386 
1387     /* copy over the A part */
1388     Aloc = (Mat_SeqAIJ*)aij->A->data;
1389     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1390     row  = mat->rmap->rstart;
1391     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1392     for (i=0; i<m; i++) {
1393       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1394       row++;
1395       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1396     }
1397     aj = Aloc->j;
1398     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1399 
1400     /* copy over the B part */
1401     Aloc = (Mat_SeqAIJ*)aij->B->data;
1402     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1403     row  = mat->rmap->rstart;
1404     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1405     ct   = cols;
1406     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1407     for (i=0; i<m; i++) {
1408       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1409       row++;
1410       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1411     }
1412     ierr = PetscFree(ct);CHKERRQ(ierr);
1413     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1414     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1415     /*
1416        Everyone has to call to draw the matrix since the graphics waits are
1417        synchronized across all processors that share the PetscDraw object
1418     */
1419     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1420     if (!rank) {
1421       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1422       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1423     }
1424     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1425     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1426     ierr = MatDestroy(&A);CHKERRQ(ierr);
1427   }
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatView_MPIAIJ"
1433 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1434 {
1435   PetscErrorCode ierr;
1436   PetscBool      iascii,isdraw,issocket,isbinary;
1437 
1438   PetscFunctionBegin;
1439   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1440   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1441   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1442   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1443   if (iascii || isdraw || isbinary || issocket) {
1444     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1445   }
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 #undef __FUNCT__
1450 #define __FUNCT__ "MatSOR_MPIAIJ"
1451 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1452 {
1453   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1454   PetscErrorCode ierr;
1455   Vec            bb1 = 0;
1456   PetscBool      hasop;
1457 
1458   PetscFunctionBegin;
1459   if (flag == SOR_APPLY_UPPER) {
1460     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1461     PetscFunctionReturn(0);
1462   }
1463 
1464   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1465     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1466   }
1467 
1468   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1469     if (flag & SOR_ZERO_INITIAL_GUESS) {
1470       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1471       its--;
1472     }
1473 
1474     while (its--) {
1475       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1476       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1477 
1478       /* update rhs: bb1 = bb - B*x */
1479       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1480       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1481 
1482       /* local sweep */
1483       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1484     }
1485   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1486     if (flag & SOR_ZERO_INITIAL_GUESS) {
1487       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1488       its--;
1489     }
1490     while (its--) {
1491       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1492       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1493 
1494       /* update rhs: bb1 = bb - B*x */
1495       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1496       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1497 
1498       /* local sweep */
1499       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1500     }
1501   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1502     if (flag & SOR_ZERO_INITIAL_GUESS) {
1503       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1504       its--;
1505     }
1506     while (its--) {
1507       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1508       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1509 
1510       /* update rhs: bb1 = bb - B*x */
1511       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1512       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1513 
1514       /* local sweep */
1515       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1516     }
1517   } else if (flag & SOR_EISENSTAT) {
1518     Vec xx1;
1519 
1520     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1521     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1522 
1523     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1524     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1525     if (!mat->diag) {
1526       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1527       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1528     }
1529     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1530     if (hasop) {
1531       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1532     } else {
1533       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1534     }
1535     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1536 
1537     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1538 
1539     /* local sweep */
1540     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1541     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1542     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1543   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1544 
1545   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1546 
1547   matin->factorerrortype = mat->A->factorerrortype;
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatPermute_MPIAIJ"
1553 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1554 {
1555   Mat            aA,aB,Aperm;
1556   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1557   PetscScalar    *aa,*ba;
1558   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1559   PetscSF        rowsf,sf;
1560   IS             parcolp = NULL;
1561   PetscBool      done;
1562   PetscErrorCode ierr;
1563 
1564   PetscFunctionBegin;
1565   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1566   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1567   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1568   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1569 
1570   /* Invert row permutation to find out where my rows should go */
1571   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1572   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1573   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1574   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1575   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1576   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1577 
1578   /* Invert column permutation to find out where my columns should go */
1579   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1580   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1581   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1582   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1583   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1584   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1585   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1586 
1587   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1588   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1589   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1590 
1591   /* Find out where my gcols should go */
1592   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1593   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1594   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1595   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1596   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1597   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1598   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1599   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1600 
1601   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1602   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1603   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1604   for (i=0; i<m; i++) {
1605     PetscInt row = rdest[i],rowner;
1606     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1607     for (j=ai[i]; j<ai[i+1]; j++) {
1608       PetscInt cowner,col = cdest[aj[j]];
1609       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1610       if (rowner == cowner) dnnz[i]++;
1611       else onnz[i]++;
1612     }
1613     for (j=bi[i]; j<bi[i+1]; j++) {
1614       PetscInt cowner,col = gcdest[bj[j]];
1615       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1616       if (rowner == cowner) dnnz[i]++;
1617       else onnz[i]++;
1618     }
1619   }
1620   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1621   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1622   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1623   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1624   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1625 
1626   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1627   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1628   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1629   for (i=0; i<m; i++) {
1630     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1631     PetscInt j0,rowlen;
1632     rowlen = ai[i+1] - ai[i];
1633     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1634       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1635       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1636     }
1637     rowlen = bi[i+1] - bi[i];
1638     for (j0=j=0; j<rowlen; j0=j) {
1639       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1640       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1641     }
1642   }
1643   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1644   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1645   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1646   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1647   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1648   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1649   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1650   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1651   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1652   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1653   *B = Aperm;
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1659 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1660 {
1661   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1662   PetscErrorCode ierr;
1663 
1664   PetscFunctionBegin;
1665   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1666   if (ghosts) *ghosts = aij->garray;
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 #undef __FUNCT__
1671 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1672 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1673 {
1674   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1675   Mat            A    = mat->A,B = mat->B;
1676   PetscErrorCode ierr;
1677   PetscReal      isend[5],irecv[5];
1678 
1679   PetscFunctionBegin;
1680   info->block_size = 1.0;
1681   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1682 
1683   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1684   isend[3] = info->memory;  isend[4] = info->mallocs;
1685 
1686   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1687 
1688   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1689   isend[3] += info->memory;  isend[4] += info->mallocs;
1690   if (flag == MAT_LOCAL) {
1691     info->nz_used      = isend[0];
1692     info->nz_allocated = isend[1];
1693     info->nz_unneeded  = isend[2];
1694     info->memory       = isend[3];
1695     info->mallocs      = isend[4];
1696   } else if (flag == MAT_GLOBAL_MAX) {
1697     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1698 
1699     info->nz_used      = irecv[0];
1700     info->nz_allocated = irecv[1];
1701     info->nz_unneeded  = irecv[2];
1702     info->memory       = irecv[3];
1703     info->mallocs      = irecv[4];
1704   } else if (flag == MAT_GLOBAL_SUM) {
1705     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1706 
1707     info->nz_used      = irecv[0];
1708     info->nz_allocated = irecv[1];
1709     info->nz_unneeded  = irecv[2];
1710     info->memory       = irecv[3];
1711     info->mallocs      = irecv[4];
1712   }
1713   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1714   info->fill_ratio_needed = 0;
1715   info->factor_mallocs    = 0;
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatSetOption_MPIAIJ"
1721 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1722 {
1723   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1724   PetscErrorCode ierr;
1725 
1726   PetscFunctionBegin;
1727   switch (op) {
1728   case MAT_NEW_NONZERO_LOCATIONS:
1729   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1730   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1731   case MAT_KEEP_NONZERO_PATTERN:
1732   case MAT_NEW_NONZERO_LOCATION_ERR:
1733   case MAT_USE_INODES:
1734   case MAT_IGNORE_ZERO_ENTRIES:
1735     MatCheckPreallocated(A,1);
1736     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1737     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1738     break;
1739   case MAT_ROW_ORIENTED:
1740     MatCheckPreallocated(A,1);
1741     a->roworiented = flg;
1742 
1743     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1744     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1745     break;
1746   case MAT_NEW_DIAGONALS:
1747     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1748     break;
1749   case MAT_IGNORE_OFF_PROC_ENTRIES:
1750     a->donotstash = flg;
1751     break;
1752   case MAT_SPD:
1753     A->spd_set = PETSC_TRUE;
1754     A->spd     = flg;
1755     if (flg) {
1756       A->symmetric                  = PETSC_TRUE;
1757       A->structurally_symmetric     = PETSC_TRUE;
1758       A->symmetric_set              = PETSC_TRUE;
1759       A->structurally_symmetric_set = PETSC_TRUE;
1760     }
1761     break;
1762   case MAT_SYMMETRIC:
1763     MatCheckPreallocated(A,1);
1764     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1765     break;
1766   case MAT_STRUCTURALLY_SYMMETRIC:
1767     MatCheckPreallocated(A,1);
1768     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1769     break;
1770   case MAT_HERMITIAN:
1771     MatCheckPreallocated(A,1);
1772     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1773     break;
1774   case MAT_SYMMETRY_ETERNAL:
1775     MatCheckPreallocated(A,1);
1776     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1777     break;
1778   case MAT_SUBMAT_SINGLEIS:
1779     A->submat_singleis = flg;
1780     break;
1781   default:
1782     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1783   }
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 #undef __FUNCT__
1788 #define __FUNCT__ "MatGetRow_MPIAIJ"
1789 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1790 {
1791   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1792   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1793   PetscErrorCode ierr;
1794   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1795   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1796   PetscInt       *cmap,*idx_p;
1797 
1798   PetscFunctionBegin;
1799   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1800   mat->getrowactive = PETSC_TRUE;
1801 
1802   if (!mat->rowvalues && (idx || v)) {
1803     /*
1804         allocate enough space to hold information from the longest row.
1805     */
1806     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1807     PetscInt   max = 1,tmp;
1808     for (i=0; i<matin->rmap->n; i++) {
1809       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1810       if (max < tmp) max = tmp;
1811     }
1812     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1813   }
1814 
1815   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1816   lrow = row - rstart;
1817 
1818   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1819   if (!v)   {pvA = 0; pvB = 0;}
1820   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1821   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1822   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1823   nztot = nzA + nzB;
1824 
1825   cmap = mat->garray;
1826   if (v  || idx) {
1827     if (nztot) {
1828       /* Sort by increasing column numbers, assuming A and B already sorted */
1829       PetscInt imark = -1;
1830       if (v) {
1831         *v = v_p = mat->rowvalues;
1832         for (i=0; i<nzB; i++) {
1833           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1834           else break;
1835         }
1836         imark = i;
1837         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1838         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1839       }
1840       if (idx) {
1841         *idx = idx_p = mat->rowindices;
1842         if (imark > -1) {
1843           for (i=0; i<imark; i++) {
1844             idx_p[i] = cmap[cworkB[i]];
1845           }
1846         } else {
1847           for (i=0; i<nzB; i++) {
1848             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1849             else break;
1850           }
1851           imark = i;
1852         }
1853         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1854         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1855       }
1856     } else {
1857       if (idx) *idx = 0;
1858       if (v)   *v   = 0;
1859     }
1860   }
1861   *nz  = nztot;
1862   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1863   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 #undef __FUNCT__
1868 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1869 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1870 {
1871   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1872 
1873   PetscFunctionBegin;
1874   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1875   aij->getrowactive = PETSC_FALSE;
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 #undef __FUNCT__
1880 #define __FUNCT__ "MatNorm_MPIAIJ"
1881 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1882 {
1883   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1884   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1885   PetscErrorCode ierr;
1886   PetscInt       i,j,cstart = mat->cmap->rstart;
1887   PetscReal      sum = 0.0;
1888   MatScalar      *v;
1889 
1890   PetscFunctionBegin;
1891   if (aij->size == 1) {
1892     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1893   } else {
1894     if (type == NORM_FROBENIUS) {
1895       v = amat->a;
1896       for (i=0; i<amat->nz; i++) {
1897         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1898       }
1899       v = bmat->a;
1900       for (i=0; i<bmat->nz; i++) {
1901         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1902       }
1903       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1904       *norm = PetscSqrtReal(*norm);
1905       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1906     } else if (type == NORM_1) { /* max column norm */
1907       PetscReal *tmp,*tmp2;
1908       PetscInt  *jj,*garray = aij->garray;
1909       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1910       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1911       *norm = 0.0;
1912       v     = amat->a; jj = amat->j;
1913       for (j=0; j<amat->nz; j++) {
1914         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1915       }
1916       v = bmat->a; jj = bmat->j;
1917       for (j=0; j<bmat->nz; j++) {
1918         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1919       }
1920       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1921       for (j=0; j<mat->cmap->N; j++) {
1922         if (tmp2[j] > *norm) *norm = tmp2[j];
1923       }
1924       ierr = PetscFree(tmp);CHKERRQ(ierr);
1925       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1926       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1927     } else if (type == NORM_INFINITY) { /* max row norm */
1928       PetscReal ntemp = 0.0;
1929       for (j=0; j<aij->A->rmap->n; j++) {
1930         v   = amat->a + amat->i[j];
1931         sum = 0.0;
1932         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1933           sum += PetscAbsScalar(*v); v++;
1934         }
1935         v = bmat->a + bmat->i[j];
1936         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1937           sum += PetscAbsScalar(*v); v++;
1938         }
1939         if (sum > ntemp) ntemp = sum;
1940       }
1941       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1942       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1943     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1944   }
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatTranspose_MPIAIJ"
1950 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1951 {
1952   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1953   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1954   PetscErrorCode ierr;
1955   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1956   PetscInt       cstart = A->cmap->rstart,ncol;
1957   Mat            B;
1958   MatScalar      *array;
1959 
1960   PetscFunctionBegin;
1961   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1962 
1963   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1964   ai = Aloc->i; aj = Aloc->j;
1965   bi = Bloc->i; bj = Bloc->j;
1966   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1967     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1968     PetscSFNode          *oloc;
1969     PETSC_UNUSED PetscSF sf;
1970 
1971     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1972     /* compute d_nnz for preallocation */
1973     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1974     for (i=0; i<ai[ma]; i++) {
1975       d_nnz[aj[i]]++;
1976       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1977     }
1978     /* compute local off-diagonal contributions */
1979     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1980     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1981     /* map those to global */
1982     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1983     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1984     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1985     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1986     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1987     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1988     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1989 
1990     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1991     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1992     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1993     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1994     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1995     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1996   } else {
1997     B    = *matout;
1998     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1999     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2000   }
2001 
2002   /* copy over the A part */
2003   array = Aloc->a;
2004   row   = A->rmap->rstart;
2005   for (i=0; i<ma; i++) {
2006     ncol = ai[i+1]-ai[i];
2007     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2008     row++;
2009     array += ncol; aj += ncol;
2010   }
2011   aj = Aloc->j;
2012   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2013 
2014   /* copy over the B part */
2015   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2016   array = Bloc->a;
2017   row   = A->rmap->rstart;
2018   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2019   cols_tmp = cols;
2020   for (i=0; i<mb; i++) {
2021     ncol = bi[i+1]-bi[i];
2022     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2023     row++;
2024     array += ncol; cols_tmp += ncol;
2025   }
2026   ierr = PetscFree(cols);CHKERRQ(ierr);
2027 
2028   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2029   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2030   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2031     *matout = B;
2032   } else {
2033     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2034   }
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 #undef __FUNCT__
2039 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2040 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2041 {
2042   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2043   Mat            a    = aij->A,b = aij->B;
2044   PetscErrorCode ierr;
2045   PetscInt       s1,s2,s3;
2046 
2047   PetscFunctionBegin;
2048   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2049   if (rr) {
2050     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2051     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2052     /* Overlap communication with computation. */
2053     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2054   }
2055   if (ll) {
2056     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2057     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2058     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2059   }
2060   /* scale  the diagonal block */
2061   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2062 
2063   if (rr) {
2064     /* Do a scatter end and then right scale the off-diagonal block */
2065     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2066     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2067   }
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 #undef __FUNCT__
2072 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2073 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2074 {
2075   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2076   PetscErrorCode ierr;
2077 
2078   PetscFunctionBegin;
2079   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 #undef __FUNCT__
2084 #define __FUNCT__ "MatEqual_MPIAIJ"
2085 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2086 {
2087   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2088   Mat            a,b,c,d;
2089   PetscBool      flg;
2090   PetscErrorCode ierr;
2091 
2092   PetscFunctionBegin;
2093   a = matA->A; b = matA->B;
2094   c = matB->A; d = matB->B;
2095 
2096   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2097   if (flg) {
2098     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2099   }
2100   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 #undef __FUNCT__
2105 #define __FUNCT__ "MatCopy_MPIAIJ"
2106 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2107 {
2108   PetscErrorCode ierr;
2109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2110   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2111 
2112   PetscFunctionBegin;
2113   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2114   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2115     /* because of the column compression in the off-processor part of the matrix a->B,
2116        the number of columns in a->B and b->B may be different, hence we cannot call
2117        the MatCopy() directly on the two parts. If need be, we can provide a more
2118        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2119        then copying the submatrices */
2120     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2121   } else {
2122     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2123     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2124   }
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 #undef __FUNCT__
2129 #define __FUNCT__ "MatSetUp_MPIAIJ"
2130 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2131 {
2132   PetscErrorCode ierr;
2133 
2134   PetscFunctionBegin;
2135   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 /*
2140    Computes the number of nonzeros per row needed for preallocation when X and Y
2141    have different nonzero structure.
2142 */
2143 #undef __FUNCT__
2144 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2145 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2146 {
2147   PetscInt       i,j,k,nzx,nzy;
2148 
2149   PetscFunctionBegin;
2150   /* Set the number of nonzeros in the new matrix */
2151   for (i=0; i<m; i++) {
2152     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2153     nzx = xi[i+1] - xi[i];
2154     nzy = yi[i+1] - yi[i];
2155     nnz[i] = 0;
2156     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2157       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2158       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2159       nnz[i]++;
2160     }
2161     for (; k<nzy; k++) nnz[i]++;
2162   }
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2169 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2170 {
2171   PetscErrorCode ierr;
2172   PetscInt       m = Y->rmap->N;
2173   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2174   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 #undef __FUNCT__
2182 #define __FUNCT__ "MatAXPY_MPIAIJ"
2183 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2184 {
2185   PetscErrorCode ierr;
2186   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2187   PetscBLASInt   bnz,one=1;
2188   Mat_SeqAIJ     *x,*y;
2189 
2190   PetscFunctionBegin;
2191   if (str == SAME_NONZERO_PATTERN) {
2192     PetscScalar alpha = a;
2193     x    = (Mat_SeqAIJ*)xx->A->data;
2194     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2195     y    = (Mat_SeqAIJ*)yy->A->data;
2196     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2197     x    = (Mat_SeqAIJ*)xx->B->data;
2198     y    = (Mat_SeqAIJ*)yy->B->data;
2199     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2200     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2201     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2202   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2203     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2204   } else {
2205     Mat      B;
2206     PetscInt *nnz_d,*nnz_o;
2207     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2208     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2209     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2210     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2211     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2212     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2213     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2214     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2215     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2216     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2217     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2218     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2219     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2220     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2221   }
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2226 
2227 #undef __FUNCT__
2228 #define __FUNCT__ "MatConjugate_MPIAIJ"
2229 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2230 {
2231 #if defined(PETSC_USE_COMPLEX)
2232   PetscErrorCode ierr;
2233   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2234 
2235   PetscFunctionBegin;
2236   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2237   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2238 #else
2239   PetscFunctionBegin;
2240 #endif
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 #undef __FUNCT__
2245 #define __FUNCT__ "MatRealPart_MPIAIJ"
2246 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2247 {
2248   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2249   PetscErrorCode ierr;
2250 
2251   PetscFunctionBegin;
2252   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2253   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2259 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2260 {
2261   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2262   PetscErrorCode ierr;
2263 
2264   PetscFunctionBegin;
2265   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2266   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 #undef __FUNCT__
2271 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2272 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2273 {
2274   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2275   PetscErrorCode ierr;
2276   PetscInt       i,*idxb = 0;
2277   PetscScalar    *va,*vb;
2278   Vec            vtmp;
2279 
2280   PetscFunctionBegin;
2281   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2282   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2283   if (idx) {
2284     for (i=0; i<A->rmap->n; i++) {
2285       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2286     }
2287   }
2288 
2289   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2290   if (idx) {
2291     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2292   }
2293   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2294   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2295 
2296   for (i=0; i<A->rmap->n; i++) {
2297     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2298       va[i] = vb[i];
2299       if (idx) idx[i] = a->garray[idxb[i]];
2300     }
2301   }
2302 
2303   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2304   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2305   ierr = PetscFree(idxb);CHKERRQ(ierr);
2306   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 #undef __FUNCT__
2311 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2312 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2313 {
2314   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2315   PetscErrorCode ierr;
2316   PetscInt       i,*idxb = 0;
2317   PetscScalar    *va,*vb;
2318   Vec            vtmp;
2319 
2320   PetscFunctionBegin;
2321   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2322   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2323   if (idx) {
2324     for (i=0; i<A->cmap->n; i++) {
2325       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2326     }
2327   }
2328 
2329   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2330   if (idx) {
2331     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2332   }
2333   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2334   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2335 
2336   for (i=0; i<A->rmap->n; i++) {
2337     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2338       va[i] = vb[i];
2339       if (idx) idx[i] = a->garray[idxb[i]];
2340     }
2341   }
2342 
2343   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2344   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2345   ierr = PetscFree(idxb);CHKERRQ(ierr);
2346   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2347   PetscFunctionReturn(0);
2348 }
2349 
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2352 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2353 {
2354   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2355   PetscInt       n      = A->rmap->n;
2356   PetscInt       cstart = A->cmap->rstart;
2357   PetscInt       *cmap  = mat->garray;
2358   PetscInt       *diagIdx, *offdiagIdx;
2359   Vec            diagV, offdiagV;
2360   PetscScalar    *a, *diagA, *offdiagA;
2361   PetscInt       r;
2362   PetscErrorCode ierr;
2363 
2364   PetscFunctionBegin;
2365   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2366   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2367   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2368   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2369   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2370   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2371   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2372   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2373   for (r = 0; r < n; ++r) {
2374     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2375       a[r]   = diagA[r];
2376       idx[r] = cstart + diagIdx[r];
2377     } else {
2378       a[r]   = offdiagA[r];
2379       idx[r] = cmap[offdiagIdx[r]];
2380     }
2381   }
2382   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2383   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2384   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2385   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2386   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2387   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2393 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2394 {
2395   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2396   PetscInt       n      = A->rmap->n;
2397   PetscInt       cstart = A->cmap->rstart;
2398   PetscInt       *cmap  = mat->garray;
2399   PetscInt       *diagIdx, *offdiagIdx;
2400   Vec            diagV, offdiagV;
2401   PetscScalar    *a, *diagA, *offdiagA;
2402   PetscInt       r;
2403   PetscErrorCode ierr;
2404 
2405   PetscFunctionBegin;
2406   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2407   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2408   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2409   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2410   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2411   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2412   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2413   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2414   for (r = 0; r < n; ++r) {
2415     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2416       a[r]   = diagA[r];
2417       idx[r] = cstart + diagIdx[r];
2418     } else {
2419       a[r]   = offdiagA[r];
2420       idx[r] = cmap[offdiagIdx[r]];
2421     }
2422   }
2423   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2424   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2425   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2426   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2427   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2428   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2434 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2435 {
2436   PetscErrorCode ierr;
2437   Mat            *dummy;
2438 
2439   PetscFunctionBegin;
2440   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2441   *newmat = *dummy;
2442   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 #undef __FUNCT__
2447 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2448 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2449 {
2450   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2451   PetscErrorCode ierr;
2452 
2453   PetscFunctionBegin;
2454   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2455   A->factorerrortype = a->A->factorerrortype;
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 #undef __FUNCT__
2460 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2461 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2462 {
2463   PetscErrorCode ierr;
2464   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2465 
2466   PetscFunctionBegin;
2467   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2468   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2469   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2470   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 #undef __FUNCT__
2475 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2476 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2477 {
2478   PetscFunctionBegin;
2479   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2480   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2486 /*@
2487    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2488 
2489    Collective on Mat
2490 
2491    Input Parameters:
2492 +    A - the matrix
2493 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2494 
2495  Level: advanced
2496 
2497 @*/
2498 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2499 {
2500   PetscErrorCode       ierr;
2501 
2502   PetscFunctionBegin;
2503   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2504   PetscFunctionReturn(0);
2505 }
2506 
2507 #undef __FUNCT__
2508 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2509 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2510 {
2511   PetscErrorCode       ierr;
2512   PetscBool            sc = PETSC_FALSE,flg;
2513 
2514   PetscFunctionBegin;
2515   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2516   ierr = PetscObjectOptionsBegin((PetscObject)A);
2517     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2518     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2519     if (flg) {
2520       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2521     }
2522   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatShift_MPIAIJ"
2528 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2529 {
2530   PetscErrorCode ierr;
2531   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2532   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2533 
2534   PetscFunctionBegin;
2535   if (!Y->preallocated) {
2536     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2537   } else if (!aij->nz) {
2538     PetscInt nonew = aij->nonew;
2539     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2540     aij->nonew = nonew;
2541   }
2542   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 #undef __FUNCT__
2547 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2548 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2549 {
2550   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2551   PetscErrorCode ierr;
2552 
2553   PetscFunctionBegin;
2554   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2555   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2556   if (d) {
2557     PetscInt rstart;
2558     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2559     *d += rstart;
2560 
2561   }
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 
2566 /* -------------------------------------------------------------------*/
2567 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2568                                        MatGetRow_MPIAIJ,
2569                                        MatRestoreRow_MPIAIJ,
2570                                        MatMult_MPIAIJ,
2571                                 /* 4*/ MatMultAdd_MPIAIJ,
2572                                        MatMultTranspose_MPIAIJ,
2573                                        MatMultTransposeAdd_MPIAIJ,
2574                                        0,
2575                                        0,
2576                                        0,
2577                                 /*10*/ 0,
2578                                        0,
2579                                        0,
2580                                        MatSOR_MPIAIJ,
2581                                        MatTranspose_MPIAIJ,
2582                                 /*15*/ MatGetInfo_MPIAIJ,
2583                                        MatEqual_MPIAIJ,
2584                                        MatGetDiagonal_MPIAIJ,
2585                                        MatDiagonalScale_MPIAIJ,
2586                                        MatNorm_MPIAIJ,
2587                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2588                                        MatAssemblyEnd_MPIAIJ,
2589                                        MatSetOption_MPIAIJ,
2590                                        MatZeroEntries_MPIAIJ,
2591                                 /*24*/ MatZeroRows_MPIAIJ,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                        0,
2596                                 /*29*/ MatSetUp_MPIAIJ,
2597                                        0,
2598                                        0,
2599                                        MatGetDiagonalBlock_MPIAIJ,
2600                                        0,
2601                                 /*34*/ MatDuplicate_MPIAIJ,
2602                                        0,
2603                                        0,
2604                                        0,
2605                                        0,
2606                                 /*39*/ MatAXPY_MPIAIJ,
2607                                        MatGetSubMatrices_MPIAIJ,
2608                                        MatIncreaseOverlap_MPIAIJ,
2609                                        MatGetValues_MPIAIJ,
2610                                        MatCopy_MPIAIJ,
2611                                 /*44*/ MatGetRowMax_MPIAIJ,
2612                                        MatScale_MPIAIJ,
2613                                        MatShift_MPIAIJ,
2614                                        MatDiagonalSet_MPIAIJ,
2615                                        MatZeroRowsColumns_MPIAIJ,
2616                                 /*49*/ MatSetRandom_MPIAIJ,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2622                                        0,
2623                                        MatSetUnfactored_MPIAIJ,
2624                                        MatPermute_MPIAIJ,
2625                                        0,
2626                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2627                                        MatDestroy_MPIAIJ,
2628                                        MatView_MPIAIJ,
2629                                        0,
2630                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2631                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2632                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2637                                        MatGetRowMinAbs_MPIAIJ,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                 /*75*/ MatFDColoringApply_AIJ,
2643                                        MatSetFromOptions_MPIAIJ,
2644                                        0,
2645                                        0,
2646                                        MatFindZeroDiagonals_MPIAIJ,
2647                                 /*80*/ 0,
2648                                        0,
2649                                        0,
2650                                 /*83*/ MatLoad_MPIAIJ,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2657                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2658                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2659                                        MatPtAP_MPIAIJ_MPIAIJ,
2660                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2661                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                 /*99*/ 0,
2667                                        0,
2668                                        0,
2669                                        MatConjugate_MPIAIJ,
2670                                        0,
2671                                 /*104*/MatSetValuesRow_MPIAIJ,
2672                                        MatRealPart_MPIAIJ,
2673                                        MatImaginaryPart_MPIAIJ,
2674                                        0,
2675                                        0,
2676                                 /*109*/0,
2677                                        0,
2678                                        MatGetRowMin_MPIAIJ,
2679                                        0,
2680                                        MatMissingDiagonal_MPIAIJ,
2681                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2682                                        0,
2683                                        MatGetGhosts_MPIAIJ,
2684                                        0,
2685                                        0,
2686                                 /*119*/0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                        MatGetMultiProcBlock_MPIAIJ,
2691                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2692                                        MatGetColumnNorms_MPIAIJ,
2693                                        MatInvertBlockDiagonal_MPIAIJ,
2694                                        0,
2695                                        MatGetSubMatricesMPI_MPIAIJ,
2696                                 /*129*/0,
2697                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2698                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2699                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2700                                        0,
2701                                 /*134*/0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                 /*139*/0,
2707                                        0,
2708                                        0,
2709                                        MatFDColoringSetUp_MPIXAIJ,
2710                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2711                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2712 };
2713 
2714 /* ----------------------------------------------------------------------------------------*/
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2718 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2719 {
2720   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2721   PetscErrorCode ierr;
2722 
2723   PetscFunctionBegin;
2724   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2725   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 #undef __FUNCT__
2730 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2731 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2732 {
2733   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2734   PetscErrorCode ierr;
2735 
2736   PetscFunctionBegin;
2737   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2738   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2739   PetscFunctionReturn(0);
2740 }
2741 
2742 #undef __FUNCT__
2743 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2744 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2745 {
2746   Mat_MPIAIJ     *b;
2747   PetscErrorCode ierr;
2748 
2749   PetscFunctionBegin;
2750   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2751   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2752   b = (Mat_MPIAIJ*)B->data;
2753 
2754 #if defined(PETSC_USE_CTABLE)
2755   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2756 #else
2757   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2758 #endif
2759   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2760   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2761   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2762 
2763   /* Because the B will have been resized we simply destroy it and create a new one each time */
2764   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2765   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2766   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2767   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2768   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2769   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2770 
2771   if (!B->preallocated) {
2772     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2773     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2774     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2775     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2776     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2777   }
2778 
2779   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2780   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2781   B->preallocated  = PETSC_TRUE;
2782   B->was_assembled = PETSC_FALSE;
2783   B->assembled     = PETSC_FALSE;;
2784   PetscFunctionReturn(0);
2785 }
2786 
2787 #undef __FUNCT__
2788 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2789 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2790 {
2791   Mat            mat;
2792   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2793   PetscErrorCode ierr;
2794 
2795   PetscFunctionBegin;
2796   *newmat = 0;
2797   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2798   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2799   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2800   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2801   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2802   a       = (Mat_MPIAIJ*)mat->data;
2803 
2804   mat->factortype   = matin->factortype;
2805   mat->assembled    = PETSC_TRUE;
2806   mat->insertmode   = NOT_SET_VALUES;
2807   mat->preallocated = PETSC_TRUE;
2808 
2809   a->size         = oldmat->size;
2810   a->rank         = oldmat->rank;
2811   a->donotstash   = oldmat->donotstash;
2812   a->roworiented  = oldmat->roworiented;
2813   a->rowindices   = 0;
2814   a->rowvalues    = 0;
2815   a->getrowactive = PETSC_FALSE;
2816 
2817   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2818   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2819 
2820   if (oldmat->colmap) {
2821 #if defined(PETSC_USE_CTABLE)
2822     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2823 #else
2824     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2825     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2826     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2827 #endif
2828   } else a->colmap = 0;
2829   if (oldmat->garray) {
2830     PetscInt len;
2831     len  = oldmat->B->cmap->n;
2832     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2833     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2834     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2835   } else a->garray = 0;
2836 
2837   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2838   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2839   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2840   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2841   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2842   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2843   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2844   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2845   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2846   *newmat = mat;
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 
2851 
2852 #undef __FUNCT__
2853 #define __FUNCT__ "MatLoad_MPIAIJ"
2854 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2855 {
2856   PetscScalar    *vals,*svals;
2857   MPI_Comm       comm;
2858   PetscErrorCode ierr;
2859   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2860   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2861   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2862   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2863   PetscInt       cend,cstart,n,*rowners;
2864   int            fd;
2865   PetscInt       bs = newMat->rmap->bs;
2866 
2867   PetscFunctionBegin;
2868   /* force binary viewer to load .info file if it has not yet done so */
2869   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2870   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2871   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2872   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2873   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2874   if (!rank) {
2875     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2876     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2877     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2878   }
2879 
2880   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2881   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2882   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2883   if (bs < 0) bs = 1;
2884 
2885   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2886   M    = header[1]; N = header[2];
2887 
2888   /* If global sizes are set, check if they are consistent with that given in the file */
2889   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2890   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2891 
2892   /* determine ownership of all (block) rows */
2893   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2894   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2895   else m = newMat->rmap->n; /* Set by user */
2896 
2897   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2898   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2899 
2900   /* First process needs enough room for process with most rows */
2901   if (!rank) {
2902     mmax = rowners[1];
2903     for (i=2; i<=size; i++) {
2904       mmax = PetscMax(mmax, rowners[i]);
2905     }
2906   } else mmax = -1;             /* unused, but compilers complain */
2907 
2908   rowners[0] = 0;
2909   for (i=2; i<=size; i++) {
2910     rowners[i] += rowners[i-1];
2911   }
2912   rstart = rowners[rank];
2913   rend   = rowners[rank+1];
2914 
2915   /* distribute row lengths to all processors */
2916   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2917   if (!rank) {
2918     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2919     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2920     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2921     for (j=0; j<m; j++) {
2922       procsnz[0] += ourlens[j];
2923     }
2924     for (i=1; i<size; i++) {
2925       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2926       /* calculate the number of nonzeros on each processor */
2927       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2928         procsnz[i] += rowlengths[j];
2929       }
2930       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2931     }
2932     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2933   } else {
2934     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2935   }
2936 
2937   if (!rank) {
2938     /* determine max buffer needed and allocate it */
2939     maxnz = 0;
2940     for (i=0; i<size; i++) {
2941       maxnz = PetscMax(maxnz,procsnz[i]);
2942     }
2943     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2944 
2945     /* read in my part of the matrix column indices  */
2946     nz   = procsnz[0];
2947     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2948     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2949 
2950     /* read in every one elses and ship off */
2951     for (i=1; i<size; i++) {
2952       nz   = procsnz[i];
2953       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2954       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2955     }
2956     ierr = PetscFree(cols);CHKERRQ(ierr);
2957   } else {
2958     /* determine buffer space needed for message */
2959     nz = 0;
2960     for (i=0; i<m; i++) {
2961       nz += ourlens[i];
2962     }
2963     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2964 
2965     /* receive message of column indices*/
2966     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2967   }
2968 
2969   /* determine column ownership if matrix is not square */
2970   if (N != M) {
2971     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2972     else n = newMat->cmap->n;
2973     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2974     cstart = cend - n;
2975   } else {
2976     cstart = rstart;
2977     cend   = rend;
2978     n      = cend - cstart;
2979   }
2980 
2981   /* loop over local rows, determining number of off diagonal entries */
2982   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2983   jj   = 0;
2984   for (i=0; i<m; i++) {
2985     for (j=0; j<ourlens[i]; j++) {
2986       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2987       jj++;
2988     }
2989   }
2990 
2991   for (i=0; i<m; i++) {
2992     ourlens[i] -= offlens[i];
2993   }
2994   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2995 
2996   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2997 
2998   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2999 
3000   for (i=0; i<m; i++) {
3001     ourlens[i] += offlens[i];
3002   }
3003 
3004   if (!rank) {
3005     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3006 
3007     /* read in my part of the matrix numerical values  */
3008     nz   = procsnz[0];
3009     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3010 
3011     /* insert into matrix */
3012     jj      = rstart;
3013     smycols = mycols;
3014     svals   = vals;
3015     for (i=0; i<m; i++) {
3016       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3017       smycols += ourlens[i];
3018       svals   += ourlens[i];
3019       jj++;
3020     }
3021 
3022     /* read in other processors and ship out */
3023     for (i=1; i<size; i++) {
3024       nz   = procsnz[i];
3025       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3026       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3027     }
3028     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3029   } else {
3030     /* receive numeric values */
3031     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3032 
3033     /* receive message of values*/
3034     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3035 
3036     /* insert into matrix */
3037     jj      = rstart;
3038     smycols = mycols;
3039     svals   = vals;
3040     for (i=0; i<m; i++) {
3041       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3042       smycols += ourlens[i];
3043       svals   += ourlens[i];
3044       jj++;
3045     }
3046   }
3047   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3048   ierr = PetscFree(vals);CHKERRQ(ierr);
3049   ierr = PetscFree(mycols);CHKERRQ(ierr);
3050   ierr = PetscFree(rowners);CHKERRQ(ierr);
3051   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3052   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3053   PetscFunctionReturn(0);
3054 }
3055 
3056 #undef __FUNCT__
3057 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3058 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3059 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3060 {
3061   PetscErrorCode ierr;
3062   IS             iscol_local;
3063   PetscInt       csize;
3064 
3065   PetscFunctionBegin;
3066   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3067   if (call == MAT_REUSE_MATRIX) {
3068     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3069     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3070   } else {
3071     /* check if we are grabbing all columns*/
3072     PetscBool    isstride;
3073     PetscMPIInt  lisstride = 0,gisstride;
3074     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3075     if (isstride) {
3076       PetscInt  start,len,mstart,mlen;
3077       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3078       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3079       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3080       if (mstart == start && mlen-mstart == len) lisstride = 1;
3081     }
3082     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3083     if (gisstride) {
3084       PetscInt N;
3085       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3086       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3087       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3088       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3089     } else {
3090       PetscInt cbs;
3091       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3092       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3093       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3094     }
3095   }
3096   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3097   if (call == MAT_INITIAL_MATRIX) {
3098     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3099     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3100   }
3101   PetscFunctionReturn(0);
3102 }
3103 
3104 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3105 #undef __FUNCT__
3106 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3107 /*
3108     Not great since it makes two copies of the submatrix, first an SeqAIJ
3109   in local and then by concatenating the local matrices the end result.
3110   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3111 
3112   Note: This requires a sequential iscol with all indices.
3113 */
3114 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3115 {
3116   PetscErrorCode ierr;
3117   PetscMPIInt    rank,size;
3118   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3119   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3120   PetscBool      allcolumns, colflag;
3121   Mat            M,Mreuse;
3122   MatScalar      *vwork,*aa;
3123   MPI_Comm       comm;
3124   Mat_SeqAIJ     *aij;
3125 
3126   PetscFunctionBegin;
3127   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3128   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3129   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3130 
3131   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3132   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3133   if (colflag && ncol == mat->cmap->N) {
3134     allcolumns = PETSC_TRUE;
3135     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3136   } else {
3137     allcolumns = PETSC_FALSE;
3138   }
3139   if (call ==  MAT_REUSE_MATRIX) {
3140     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3141     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3142     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3143   } else {
3144     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3145   }
3146 
3147   /*
3148       m - number of local rows
3149       n - number of columns (same on all processors)
3150       rstart - first row in new global matrix generated
3151   */
3152   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3153   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3154   if (call == MAT_INITIAL_MATRIX) {
3155     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3156     ii  = aij->i;
3157     jj  = aij->j;
3158 
3159     /*
3160         Determine the number of non-zeros in the diagonal and off-diagonal
3161         portions of the matrix in order to do correct preallocation
3162     */
3163 
3164     /* first get start and end of "diagonal" columns */
3165     if (csize == PETSC_DECIDE) {
3166       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3167       if (mglobal == n) { /* square matrix */
3168         nlocal = m;
3169       } else {
3170         nlocal = n/size + ((n % size) > rank);
3171       }
3172     } else {
3173       nlocal = csize;
3174     }
3175     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3176     rstart = rend - nlocal;
3177     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3178 
3179     /* next, compute all the lengths */
3180     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3181     olens = dlens + m;
3182     for (i=0; i<m; i++) {
3183       jend = ii[i+1] - ii[i];
3184       olen = 0;
3185       dlen = 0;
3186       for (j=0; j<jend; j++) {
3187         if (*jj < rstart || *jj >= rend) olen++;
3188         else dlen++;
3189         jj++;
3190       }
3191       olens[i] = olen;
3192       dlens[i] = dlen;
3193     }
3194     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3195     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3196     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3197     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3198     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3199     ierr = PetscFree(dlens);CHKERRQ(ierr);
3200   } else {
3201     PetscInt ml,nl;
3202 
3203     M    = *newmat;
3204     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3205     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3206     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3207     /*
3208          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3209        rather than the slower MatSetValues().
3210     */
3211     M->was_assembled = PETSC_TRUE;
3212     M->assembled     = PETSC_FALSE;
3213   }
3214   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3215   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3216   ii   = aij->i;
3217   jj   = aij->j;
3218   aa   = aij->a;
3219   for (i=0; i<m; i++) {
3220     row   = rstart + i;
3221     nz    = ii[i+1] - ii[i];
3222     cwork = jj;     jj += nz;
3223     vwork = aa;     aa += nz;
3224     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3225   }
3226 
3227   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3228   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3229   *newmat = M;
3230 
3231   /* save submatrix used in processor for next request */
3232   if (call ==  MAT_INITIAL_MATRIX) {
3233     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3234     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3235   }
3236   PetscFunctionReturn(0);
3237 }
3238 
3239 #undef __FUNCT__
3240 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3241 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3242 {
3243   PetscInt       m,cstart, cend,j,nnz,i,d;
3244   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3245   const PetscInt *JJ;
3246   PetscScalar    *values;
3247   PetscErrorCode ierr;
3248 
3249   PetscFunctionBegin;
3250   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3251 
3252   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3253   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3254   m      = B->rmap->n;
3255   cstart = B->cmap->rstart;
3256   cend   = B->cmap->rend;
3257   rstart = B->rmap->rstart;
3258 
3259   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3260 
3261 #if defined(PETSC_USE_DEBUGGING)
3262   for (i=0; i<m; i++) {
3263     nnz = Ii[i+1]- Ii[i];
3264     JJ  = J + Ii[i];
3265     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3266     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3267     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3268   }
3269 #endif
3270 
3271   for (i=0; i<m; i++) {
3272     nnz     = Ii[i+1]- Ii[i];
3273     JJ      = J + Ii[i];
3274     nnz_max = PetscMax(nnz_max,nnz);
3275     d       = 0;
3276     for (j=0; j<nnz; j++) {
3277       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3278     }
3279     d_nnz[i] = d;
3280     o_nnz[i] = nnz - d;
3281   }
3282   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3283   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3284 
3285   if (v) values = (PetscScalar*)v;
3286   else {
3287     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3288   }
3289 
3290   for (i=0; i<m; i++) {
3291     ii   = i + rstart;
3292     nnz  = Ii[i+1]- Ii[i];
3293     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3294   }
3295   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3296   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3297 
3298   if (!v) {
3299     ierr = PetscFree(values);CHKERRQ(ierr);
3300   }
3301   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3302   PetscFunctionReturn(0);
3303 }
3304 
3305 #undef __FUNCT__
3306 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3307 /*@
3308    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3309    (the default parallel PETSc format).
3310 
3311    Collective on MPI_Comm
3312 
3313    Input Parameters:
3314 +  B - the matrix
3315 .  i - the indices into j for the start of each local row (starts with zero)
3316 .  j - the column indices for each local row (starts with zero)
3317 -  v - optional values in the matrix
3318 
3319    Level: developer
3320 
3321    Notes:
3322        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3323      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3324      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3325 
3326        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3327 
3328        The format which is used for the sparse matrix input, is equivalent to a
3329     row-major ordering.. i.e for the following matrix, the input data expected is
3330     as shown
3331 
3332 $        1 0 0
3333 $        2 0 3     P0
3334 $       -------
3335 $        4 5 6     P1
3336 $
3337 $     Process0 [P0]: rows_owned=[0,1]
3338 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3339 $        j =  {0,0,2}  [size = 3]
3340 $        v =  {1,2,3}  [size = 3]
3341 $
3342 $     Process1 [P1]: rows_owned=[2]
3343 $        i =  {0,3}    [size = nrow+1  = 1+1]
3344 $        j =  {0,1,2}  [size = 3]
3345 $        v =  {4,5,6}  [size = 3]
3346 
3347 .keywords: matrix, aij, compressed row, sparse, parallel
3348 
3349 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3350           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3351 @*/
3352 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3353 {
3354   PetscErrorCode ierr;
3355 
3356   PetscFunctionBegin;
3357   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3358   PetscFunctionReturn(0);
3359 }
3360 
3361 #undef __FUNCT__
3362 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3363 /*@C
3364    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3365    (the default parallel PETSc format).  For good matrix assembly performance
3366    the user should preallocate the matrix storage by setting the parameters
3367    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3368    performance can be increased by more than a factor of 50.
3369 
3370    Collective on MPI_Comm
3371 
3372    Input Parameters:
3373 +  B - the matrix
3374 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3375            (same value is used for all local rows)
3376 .  d_nnz - array containing the number of nonzeros in the various rows of the
3377            DIAGONAL portion of the local submatrix (possibly different for each row)
3378            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3379            The size of this array is equal to the number of local rows, i.e 'm'.
3380            For matrices that will be factored, you must leave room for (and set)
3381            the diagonal entry even if it is zero.
3382 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3383            submatrix (same value is used for all local rows).
3384 -  o_nnz - array containing the number of nonzeros in the various rows of the
3385            OFF-DIAGONAL portion of the local submatrix (possibly different for
3386            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3387            structure. The size of this array is equal to the number
3388            of local rows, i.e 'm'.
3389 
3390    If the *_nnz parameter is given then the *_nz parameter is ignored
3391 
3392    The AIJ format (also called the Yale sparse matrix format or
3393    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3394    storage.  The stored row and column indices begin with zero.
3395    See Users-Manual: ch_mat for details.
3396 
3397    The parallel matrix is partitioned such that the first m0 rows belong to
3398    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3399    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3400 
3401    The DIAGONAL portion of the local submatrix of a processor can be defined
3402    as the submatrix which is obtained by extraction the part corresponding to
3403    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3404    first row that belongs to the processor, r2 is the last row belonging to
3405    the this processor, and c1-c2 is range of indices of the local part of a
3406    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3407    common case of a square matrix, the row and column ranges are the same and
3408    the DIAGONAL part is also square. The remaining portion of the local
3409    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3410 
3411    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3412 
3413    You can call MatGetInfo() to get information on how effective the preallocation was;
3414    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3415    You can also run with the option -info and look for messages with the string
3416    malloc in them to see if additional memory allocation was needed.
3417 
3418    Example usage:
3419 
3420    Consider the following 8x8 matrix with 34 non-zero values, that is
3421    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3422    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3423    as follows:
3424 
3425 .vb
3426             1  2  0  |  0  3  0  |  0  4
3427     Proc0   0  5  6  |  7  0  0  |  8  0
3428             9  0 10  | 11  0  0  | 12  0
3429     -------------------------------------
3430            13  0 14  | 15 16 17  |  0  0
3431     Proc1   0 18  0  | 19 20 21  |  0  0
3432             0  0  0  | 22 23  0  | 24  0
3433     -------------------------------------
3434     Proc2  25 26 27  |  0  0 28  | 29  0
3435            30  0  0  | 31 32 33  |  0 34
3436 .ve
3437 
3438    This can be represented as a collection of submatrices as:
3439 
3440 .vb
3441       A B C
3442       D E F
3443       G H I
3444 .ve
3445 
3446    Where the submatrices A,B,C are owned by proc0, D,E,F are
3447    owned by proc1, G,H,I are owned by proc2.
3448 
3449    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3450    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3451    The 'M','N' parameters are 8,8, and have the same values on all procs.
3452 
3453    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3454    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3455    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3456    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3457    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3458    matrix, ans [DF] as another SeqAIJ matrix.
3459 
3460    When d_nz, o_nz parameters are specified, d_nz storage elements are
3461    allocated for every row of the local diagonal submatrix, and o_nz
3462    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3463    One way to choose d_nz and o_nz is to use the max nonzerors per local
3464    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3465    In this case, the values of d_nz,o_nz are:
3466 .vb
3467      proc0 : dnz = 2, o_nz = 2
3468      proc1 : dnz = 3, o_nz = 2
3469      proc2 : dnz = 1, o_nz = 4
3470 .ve
3471    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3472    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3473    for proc3. i.e we are using 12+15+10=37 storage locations to store
3474    34 values.
3475 
3476    When d_nnz, o_nnz parameters are specified, the storage is specified
3477    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3478    In the above case the values for d_nnz,o_nnz are:
3479 .vb
3480      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3481      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3482      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3483 .ve
3484    Here the space allocated is sum of all the above values i.e 34, and
3485    hence pre-allocation is perfect.
3486 
3487    Level: intermediate
3488 
3489 .keywords: matrix, aij, compressed row, sparse, parallel
3490 
3491 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3492           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3493 @*/
3494 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3495 {
3496   PetscErrorCode ierr;
3497 
3498   PetscFunctionBegin;
3499   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3500   PetscValidType(B,1);
3501   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3502   PetscFunctionReturn(0);
3503 }
3504 
3505 #undef __FUNCT__
3506 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3507 /*@
3508      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3509          CSR format the local rows.
3510 
3511    Collective on MPI_Comm
3512 
3513    Input Parameters:
3514 +  comm - MPI communicator
3515 .  m - number of local rows (Cannot be PETSC_DECIDE)
3516 .  n - This value should be the same as the local size used in creating the
3517        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3518        calculated if N is given) For square matrices n is almost always m.
3519 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3520 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3521 .   i - row indices
3522 .   j - column indices
3523 -   a - matrix values
3524 
3525    Output Parameter:
3526 .   mat - the matrix
3527 
3528    Level: intermediate
3529 
3530    Notes:
3531        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3532      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3533      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3534 
3535        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3536 
3537        The format which is used for the sparse matrix input, is equivalent to a
3538     row-major ordering.. i.e for the following matrix, the input data expected is
3539     as shown
3540 
3541 $        1 0 0
3542 $        2 0 3     P0
3543 $       -------
3544 $        4 5 6     P1
3545 $
3546 $     Process0 [P0]: rows_owned=[0,1]
3547 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3548 $        j =  {0,0,2}  [size = 3]
3549 $        v =  {1,2,3}  [size = 3]
3550 $
3551 $     Process1 [P1]: rows_owned=[2]
3552 $        i =  {0,3}    [size = nrow+1  = 1+1]
3553 $        j =  {0,1,2}  [size = 3]
3554 $        v =  {4,5,6}  [size = 3]
3555 
3556 .keywords: matrix, aij, compressed row, sparse, parallel
3557 
3558 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3559           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3560 @*/
3561 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3562 {
3563   PetscErrorCode ierr;
3564 
3565   PetscFunctionBegin;
3566   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3567   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3568   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3569   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3570   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3571   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3572   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3573   PetscFunctionReturn(0);
3574 }
3575 
3576 #undef __FUNCT__
3577 #define __FUNCT__ "MatCreateAIJ"
3578 /*@C
3579    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3580    (the default parallel PETSc format).  For good matrix assembly performance
3581    the user should preallocate the matrix storage by setting the parameters
3582    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3583    performance can be increased by more than a factor of 50.
3584 
3585    Collective on MPI_Comm
3586 
3587    Input Parameters:
3588 +  comm - MPI communicator
3589 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3590            This value should be the same as the local size used in creating the
3591            y vector for the matrix-vector product y = Ax.
3592 .  n - This value should be the same as the local size used in creating the
3593        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3594        calculated if N is given) For square matrices n is almost always m.
3595 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3596 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3597 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3598            (same value is used for all local rows)
3599 .  d_nnz - array containing the number of nonzeros in the various rows of the
3600            DIAGONAL portion of the local submatrix (possibly different for each row)
3601            or NULL, if d_nz is used to specify the nonzero structure.
3602            The size of this array is equal to the number of local rows, i.e 'm'.
3603 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3604            submatrix (same value is used for all local rows).
3605 -  o_nnz - array containing the number of nonzeros in the various rows of the
3606            OFF-DIAGONAL portion of the local submatrix (possibly different for
3607            each row) or NULL, if o_nz is used to specify the nonzero
3608            structure. The size of this array is equal to the number
3609            of local rows, i.e 'm'.
3610 
3611    Output Parameter:
3612 .  A - the matrix
3613 
3614    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3615    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3616    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3617 
3618    Notes:
3619    If the *_nnz parameter is given then the *_nz parameter is ignored
3620 
3621    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3622    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3623    storage requirements for this matrix.
3624 
3625    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3626    processor than it must be used on all processors that share the object for
3627    that argument.
3628 
3629    The user MUST specify either the local or global matrix dimensions
3630    (possibly both).
3631 
3632    The parallel matrix is partitioned across processors such that the
3633    first m0 rows belong to process 0, the next m1 rows belong to
3634    process 1, the next m2 rows belong to process 2 etc.. where
3635    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3636    values corresponding to [m x N] submatrix.
3637 
3638    The columns are logically partitioned with the n0 columns belonging
3639    to 0th partition, the next n1 columns belonging to the next
3640    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3641 
3642    The DIAGONAL portion of the local submatrix on any given processor
3643    is the submatrix corresponding to the rows and columns m,n
3644    corresponding to the given processor. i.e diagonal matrix on
3645    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3646    etc. The remaining portion of the local submatrix [m x (N-n)]
3647    constitute the OFF-DIAGONAL portion. The example below better
3648    illustrates this concept.
3649 
3650    For a square global matrix we define each processor's diagonal portion
3651    to be its local rows and the corresponding columns (a square submatrix);
3652    each processor's off-diagonal portion encompasses the remainder of the
3653    local matrix (a rectangular submatrix).
3654 
3655    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3656 
3657    When calling this routine with a single process communicator, a matrix of
3658    type SEQAIJ is returned.  If a matrix of type MATMPIAIJ is desired for this
3659    type of communicator, use the construction mechanism:
3660      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3661 
3662    By default, this format uses inodes (identical nodes) when possible.
3663    We search for consecutive rows with the same nonzero structure, thereby
3664    reusing matrix information to achieve increased efficiency.
3665 
3666    Options Database Keys:
3667 +  -mat_no_inode  - Do not use inodes
3668 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3669 -  -mat_aij_oneindex - Internally use indexing starting at 1
3670         rather than 0.  Note that when calling MatSetValues(),
3671         the user still MUST index entries starting at 0!
3672 
3673 
3674    Example usage:
3675 
3676    Consider the following 8x8 matrix with 34 non-zero values, that is
3677    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3678    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3679    as follows:
3680 
3681 .vb
3682             1  2  0  |  0  3  0  |  0  4
3683     Proc0   0  5  6  |  7  0  0  |  8  0
3684             9  0 10  | 11  0  0  | 12  0
3685     -------------------------------------
3686            13  0 14  | 15 16 17  |  0  0
3687     Proc1   0 18  0  | 19 20 21  |  0  0
3688             0  0  0  | 22 23  0  | 24  0
3689     -------------------------------------
3690     Proc2  25 26 27  |  0  0 28  | 29  0
3691            30  0  0  | 31 32 33  |  0 34
3692 .ve
3693 
3694    This can be represented as a collection of submatrices as:
3695 
3696 .vb
3697       A B C
3698       D E F
3699       G H I
3700 .ve
3701 
3702    Where the submatrices A,B,C are owned by proc0, D,E,F are
3703    owned by proc1, G,H,I are owned by proc2.
3704 
3705    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3706    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3707    The 'M','N' parameters are 8,8, and have the same values on all procs.
3708 
3709    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3710    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3711    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3712    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3713    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3714    matrix, ans [DF] as another SeqAIJ matrix.
3715 
3716    When d_nz, o_nz parameters are specified, d_nz storage elements are
3717    allocated for every row of the local diagonal submatrix, and o_nz
3718    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3719    One way to choose d_nz and o_nz is to use the max nonzerors per local
3720    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3721    In this case, the values of d_nz,o_nz are:
3722 .vb
3723      proc0 : dnz = 2, o_nz = 2
3724      proc1 : dnz = 3, o_nz = 2
3725      proc2 : dnz = 1, o_nz = 4
3726 .ve
3727    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3728    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3729    for proc3. i.e we are using 12+15+10=37 storage locations to store
3730    34 values.
3731 
3732    When d_nnz, o_nnz parameters are specified, the storage is specified
3733    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3734    In the above case the values for d_nnz,o_nnz are:
3735 .vb
3736      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3737      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3738      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3739 .ve
3740    Here the space allocated is sum of all the above values i.e 34, and
3741    hence pre-allocation is perfect.
3742 
3743    Level: intermediate
3744 
3745 .keywords: matrix, aij, compressed row, sparse, parallel
3746 
3747 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3748           MATMPIAIJ, MatCreateMPIAIJWithArrays()
3749 @*/
3750 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3751 {
3752   PetscErrorCode ierr;
3753   PetscMPIInt    size;
3754 
3755   PetscFunctionBegin;
3756   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3757   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3758   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3759   if (size > 1) {
3760     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3761     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3762   } else {
3763     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3764     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3765   }
3766   PetscFunctionReturn(0);
3767 }
3768 
3769 #undef __FUNCT__
3770 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3771 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3772 {
3773   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3774   PetscBool      flg;
3775   PetscErrorCode ierr;
3776 
3777   PetscFunctionBegin;
3778   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3779   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
3780   if (Ad)     *Ad     = a->A;
3781   if (Ao)     *Ao     = a->B;
3782   if (colmap) *colmap = a->garray;
3783   PetscFunctionReturn(0);
3784 }
3785 
3786 #undef __FUNCT__
3787 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3788 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3789 {
3790   PetscErrorCode ierr;
3791   PetscInt       m,N,i,rstart,nnz,Ii;
3792   PetscInt       *indx;
3793   PetscScalar    *values;
3794 
3795   PetscFunctionBegin;
3796   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3797   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3798     PetscInt       *dnz,*onz,sum,bs,cbs;
3799 
3800     if (n == PETSC_DECIDE) {
3801       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3802     }
3803     /* Check sum(n) = N */
3804     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3805     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3806 
3807     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3808     rstart -= m;
3809 
3810     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3811     for (i=0; i<m; i++) {
3812       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3813       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3814       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3815     }
3816 
3817     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3818     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3819     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3820     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3821     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3822     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3823     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3824   }
3825 
3826   /* numeric phase */
3827   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3828   for (i=0; i<m; i++) {
3829     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3830     Ii   = i + rstart;
3831     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3832     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3833   }
3834   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3835   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 #undef __FUNCT__
3840 #define __FUNCT__ "MatFileSplit"
3841 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3842 {
3843   PetscErrorCode    ierr;
3844   PetscMPIInt       rank;
3845   PetscInt          m,N,i,rstart,nnz;
3846   size_t            len;
3847   const PetscInt    *indx;
3848   PetscViewer       out;
3849   char              *name;
3850   Mat               B;
3851   const PetscScalar *values;
3852 
3853   PetscFunctionBegin;
3854   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3855   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3856   /* Should this be the type of the diagonal block of A? */
3857   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3858   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3859   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3860   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3861   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3862   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3863   for (i=0; i<m; i++) {
3864     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3865     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3866     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3867   }
3868   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3869   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3870 
3871   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3872   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3873   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3874   sprintf(name,"%s.%d",outfile,rank);
3875   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3876   ierr = PetscFree(name);CHKERRQ(ierr);
3877   ierr = MatView(B,out);CHKERRQ(ierr);
3878   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3879   ierr = MatDestroy(&B);CHKERRQ(ierr);
3880   PetscFunctionReturn(0);
3881 }
3882 
3883 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3884 #undef __FUNCT__
3885 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3886 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3887 {
3888   PetscErrorCode      ierr;
3889   Mat_Merge_SeqsToMPI *merge;
3890   PetscContainer      container;
3891 
3892   PetscFunctionBegin;
3893   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3894   if (container) {
3895     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3896     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3897     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3898     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3899     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3900     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3901     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3902     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3903     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3904     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3905     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3906     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3907     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3908     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3909     ierr = PetscFree(merge);CHKERRQ(ierr);
3910     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3911   }
3912   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3913   PetscFunctionReturn(0);
3914 }
3915 
3916 #include <../src/mat/utils/freespace.h>
3917 #include <petscbt.h>
3918 
3919 #undef __FUNCT__
3920 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3921 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3922 {
3923   PetscErrorCode      ierr;
3924   MPI_Comm            comm;
3925   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3926   PetscMPIInt         size,rank,taga,*len_s;
3927   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3928   PetscInt            proc,m;
3929   PetscInt            **buf_ri,**buf_rj;
3930   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3931   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3932   MPI_Request         *s_waits,*r_waits;
3933   MPI_Status          *status;
3934   MatScalar           *aa=a->a;
3935   MatScalar           **abuf_r,*ba_i;
3936   Mat_Merge_SeqsToMPI *merge;
3937   PetscContainer      container;
3938 
3939   PetscFunctionBegin;
3940   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3941   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3942 
3943   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3944   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3945 
3946   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3947   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3948 
3949   bi     = merge->bi;
3950   bj     = merge->bj;
3951   buf_ri = merge->buf_ri;
3952   buf_rj = merge->buf_rj;
3953 
3954   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3955   owners = merge->rowmap->range;
3956   len_s  = merge->len_s;
3957 
3958   /* send and recv matrix values */
3959   /*-----------------------------*/
3960   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3961   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3962 
3963   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
3964   for (proc=0,k=0; proc<size; proc++) {
3965     if (!len_s[proc]) continue;
3966     i    = owners[proc];
3967     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3968     k++;
3969   }
3970 
3971   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3972   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3973   ierr = PetscFree(status);CHKERRQ(ierr);
3974 
3975   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3976   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3977 
3978   /* insert mat values of mpimat */
3979   /*----------------------------*/
3980   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
3981   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
3982 
3983   for (k=0; k<merge->nrecv; k++) {
3984     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3985     nrows       = *(buf_ri_k[k]);
3986     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3987     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
3988   }
3989 
3990   /* set values of ba */
3991   m = merge->rowmap->n;
3992   for (i=0; i<m; i++) {
3993     arow = owners[rank] + i;
3994     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3995     bnzi = bi[i+1] - bi[i];
3996     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3997 
3998     /* add local non-zero vals of this proc's seqmat into ba */
3999     anzi   = ai[arow+1] - ai[arow];
4000     aj     = a->j + ai[arow];
4001     aa     = a->a + ai[arow];
4002     nextaj = 0;
4003     for (j=0; nextaj<anzi; j++) {
4004       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4005         ba_i[j] += aa[nextaj++];
4006       }
4007     }
4008 
4009     /* add received vals into ba */
4010     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4011       /* i-th row */
4012       if (i == *nextrow[k]) {
4013         anzi   = *(nextai[k]+1) - *nextai[k];
4014         aj     = buf_rj[k] + *(nextai[k]);
4015         aa     = abuf_r[k] + *(nextai[k]);
4016         nextaj = 0;
4017         for (j=0; nextaj<anzi; j++) {
4018           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4019             ba_i[j] += aa[nextaj++];
4020           }
4021         }
4022         nextrow[k]++; nextai[k]++;
4023       }
4024     }
4025     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4026   }
4027   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4028   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4029 
4030   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4031   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4032   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4033   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4034   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4035   PetscFunctionReturn(0);
4036 }
4037 
4038 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4039 
4040 #undef __FUNCT__
4041 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4042 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4043 {
4044   PetscErrorCode      ierr;
4045   Mat                 B_mpi;
4046   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4047   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4048   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4049   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4050   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4051   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4052   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4053   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4054   MPI_Status          *status;
4055   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4056   PetscBT             lnkbt;
4057   Mat_Merge_SeqsToMPI *merge;
4058   PetscContainer      container;
4059 
4060   PetscFunctionBegin;
4061   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4062 
4063   /* make sure it is a PETSc comm */
4064   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4065   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4066   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4067 
4068   ierr = PetscNew(&merge);CHKERRQ(ierr);
4069   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4070 
4071   /* determine row ownership */
4072   /*---------------------------------------------------------*/
4073   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4074   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4075   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4076   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4077   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4078   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4079   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4080 
4081   m      = merge->rowmap->n;
4082   owners = merge->rowmap->range;
4083 
4084   /* determine the number of messages to send, their lengths */
4085   /*---------------------------------------------------------*/
4086   len_s = merge->len_s;
4087 
4088   len          = 0; /* length of buf_si[] */
4089   merge->nsend = 0;
4090   for (proc=0; proc<size; proc++) {
4091     len_si[proc] = 0;
4092     if (proc == rank) {
4093       len_s[proc] = 0;
4094     } else {
4095       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4096       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4097     }
4098     if (len_s[proc]) {
4099       merge->nsend++;
4100       nrows = 0;
4101       for (i=owners[proc]; i<owners[proc+1]; i++) {
4102         if (ai[i+1] > ai[i]) nrows++;
4103       }
4104       len_si[proc] = 2*(nrows+1);
4105       len         += len_si[proc];
4106     }
4107   }
4108 
4109   /* determine the number and length of messages to receive for ij-structure */
4110   /*-------------------------------------------------------------------------*/
4111   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4112   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4113 
4114   /* post the Irecv of j-structure */
4115   /*-------------------------------*/
4116   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4117   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4118 
4119   /* post the Isend of j-structure */
4120   /*--------------------------------*/
4121   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4122 
4123   for (proc=0, k=0; proc<size; proc++) {
4124     if (!len_s[proc]) continue;
4125     i    = owners[proc];
4126     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4127     k++;
4128   }
4129 
4130   /* receives and sends of j-structure are complete */
4131   /*------------------------------------------------*/
4132   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4133   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4134 
4135   /* send and recv i-structure */
4136   /*---------------------------*/
4137   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4138   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4139 
4140   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4141   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4142   for (proc=0,k=0; proc<size; proc++) {
4143     if (!len_s[proc]) continue;
4144     /* form outgoing message for i-structure:
4145          buf_si[0]:                 nrows to be sent
4146                [1:nrows]:           row index (global)
4147                [nrows+1:2*nrows+1]: i-structure index
4148     */
4149     /*-------------------------------------------*/
4150     nrows       = len_si[proc]/2 - 1;
4151     buf_si_i    = buf_si + nrows+1;
4152     buf_si[0]   = nrows;
4153     buf_si_i[0] = 0;
4154     nrows       = 0;
4155     for (i=owners[proc]; i<owners[proc+1]; i++) {
4156       anzi = ai[i+1] - ai[i];
4157       if (anzi) {
4158         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4159         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4160         nrows++;
4161       }
4162     }
4163     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4164     k++;
4165     buf_si += len_si[proc];
4166   }
4167 
4168   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4169   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4170 
4171   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4172   for (i=0; i<merge->nrecv; i++) {
4173     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4174   }
4175 
4176   ierr = PetscFree(len_si);CHKERRQ(ierr);
4177   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4178   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4179   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4180   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4181   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4182   ierr = PetscFree(status);CHKERRQ(ierr);
4183 
4184   /* compute a local seq matrix in each processor */
4185   /*----------------------------------------------*/
4186   /* allocate bi array and free space for accumulating nonzero column info */
4187   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4188   bi[0] = 0;
4189 
4190   /* create and initialize a linked list */
4191   nlnk = N+1;
4192   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4193 
4194   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4195   len  = ai[owners[rank+1]] - ai[owners[rank]];
4196   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4197 
4198   current_space = free_space;
4199 
4200   /* determine symbolic info for each local row */
4201   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4202 
4203   for (k=0; k<merge->nrecv; k++) {
4204     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4205     nrows       = *buf_ri_k[k];
4206     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4207     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4208   }
4209 
4210   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4211   len  = 0;
4212   for (i=0; i<m; i++) {
4213     bnzi = 0;
4214     /* add local non-zero cols of this proc's seqmat into lnk */
4215     arow  = owners[rank] + i;
4216     anzi  = ai[arow+1] - ai[arow];
4217     aj    = a->j + ai[arow];
4218     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4219     bnzi += nlnk;
4220     /* add received col data into lnk */
4221     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4222       if (i == *nextrow[k]) { /* i-th row */
4223         anzi  = *(nextai[k]+1) - *nextai[k];
4224         aj    = buf_rj[k] + *nextai[k];
4225         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4226         bnzi += nlnk;
4227         nextrow[k]++; nextai[k]++;
4228       }
4229     }
4230     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4231 
4232     /* if free space is not available, make more free space */
4233     if (current_space->local_remaining<bnzi) {
4234       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4235       nspacedouble++;
4236     }
4237     /* copy data into free space, then initialize lnk */
4238     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4239     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4240 
4241     current_space->array           += bnzi;
4242     current_space->local_used      += bnzi;
4243     current_space->local_remaining -= bnzi;
4244 
4245     bi[i+1] = bi[i] + bnzi;
4246   }
4247 
4248   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4249 
4250   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4251   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4252   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4253 
4254   /* create symbolic parallel matrix B_mpi */
4255   /*---------------------------------------*/
4256   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4257   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4258   if (n==PETSC_DECIDE) {
4259     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4260   } else {
4261     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4262   }
4263   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4264   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4265   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4266   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4267   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4268 
4269   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4270   B_mpi->assembled    = PETSC_FALSE;
4271   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4272   merge->bi           = bi;
4273   merge->bj           = bj;
4274   merge->buf_ri       = buf_ri;
4275   merge->buf_rj       = buf_rj;
4276   merge->coi          = NULL;
4277   merge->coj          = NULL;
4278   merge->owners_co    = NULL;
4279 
4280   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4281 
4282   /* attach the supporting struct to B_mpi for reuse */
4283   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4284   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4285   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4286   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4287   *mpimat = B_mpi;
4288 
4289   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4290   PetscFunctionReturn(0);
4291 }
4292 
4293 #undef __FUNCT__
4294 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4295 /*@C
4296       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4297                  matrices from each processor
4298 
4299     Collective on MPI_Comm
4300 
4301    Input Parameters:
4302 +    comm - the communicators the parallel matrix will live on
4303 .    seqmat - the input sequential matrices
4304 .    m - number of local rows (or PETSC_DECIDE)
4305 .    n - number of local columns (or PETSC_DECIDE)
4306 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4307 
4308    Output Parameter:
4309 .    mpimat - the parallel matrix generated
4310 
4311     Level: advanced
4312 
4313    Notes:
4314      The dimensions of the sequential matrix in each processor MUST be the same.
4315      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4316      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4317 @*/
4318 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4319 {
4320   PetscErrorCode ierr;
4321   PetscMPIInt    size;
4322 
4323   PetscFunctionBegin;
4324   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4325   if (size == 1) {
4326     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4327     if (scall == MAT_INITIAL_MATRIX) {
4328       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4329     } else {
4330       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4331     }
4332     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4333     PetscFunctionReturn(0);
4334   }
4335   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4336   if (scall == MAT_INITIAL_MATRIX) {
4337     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4338   }
4339   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4340   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4341   PetscFunctionReturn(0);
4342 }
4343 
4344 #undef __FUNCT__
4345 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4346 /*@
4347      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4348           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4349           with MatGetSize()
4350 
4351     Not Collective
4352 
4353    Input Parameters:
4354 +    A - the matrix
4355 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4356 
4357    Output Parameter:
4358 .    A_loc - the local sequential matrix generated
4359 
4360     Level: developer
4361 
4362 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4363 
4364 @*/
4365 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4366 {
4367   PetscErrorCode ierr;
4368   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4369   Mat_SeqAIJ     *mat,*a,*b;
4370   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4371   MatScalar      *aa,*ba,*cam;
4372   PetscScalar    *ca;
4373   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4374   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4375   PetscBool      match;
4376   MPI_Comm       comm;
4377   PetscMPIInt    size;
4378 
4379   PetscFunctionBegin;
4380   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4381   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4382   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4383   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4384   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4385 
4386   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4387   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4388   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4389   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4390   aa = a->a; ba = b->a;
4391   if (scall == MAT_INITIAL_MATRIX) {
4392     if (size == 1) {
4393       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4394       PetscFunctionReturn(0);
4395     }
4396 
4397     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4398     ci[0] = 0;
4399     for (i=0; i<am; i++) {
4400       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4401     }
4402     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4403     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4404     k    = 0;
4405     for (i=0; i<am; i++) {
4406       ncols_o = bi[i+1] - bi[i];
4407       ncols_d = ai[i+1] - ai[i];
4408       /* off-diagonal portion of A */
4409       for (jo=0; jo<ncols_o; jo++) {
4410         col = cmap[*bj];
4411         if (col >= cstart) break;
4412         cj[k]   = col; bj++;
4413         ca[k++] = *ba++;
4414       }
4415       /* diagonal portion of A */
4416       for (j=0; j<ncols_d; j++) {
4417         cj[k]   = cstart + *aj++;
4418         ca[k++] = *aa++;
4419       }
4420       /* off-diagonal portion of A */
4421       for (j=jo; j<ncols_o; j++) {
4422         cj[k]   = cmap[*bj++];
4423         ca[k++] = *ba++;
4424       }
4425     }
4426     /* put together the new matrix */
4427     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4428     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4429     /* Since these are PETSc arrays, change flags to free them as necessary. */
4430     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4431     mat->free_a  = PETSC_TRUE;
4432     mat->free_ij = PETSC_TRUE;
4433     mat->nonew   = 0;
4434   } else if (scall == MAT_REUSE_MATRIX) {
4435     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4436     ci = mat->i; cj = mat->j; cam = mat->a;
4437     for (i=0; i<am; i++) {
4438       /* off-diagonal portion of A */
4439       ncols_o = bi[i+1] - bi[i];
4440       for (jo=0; jo<ncols_o; jo++) {
4441         col = cmap[*bj];
4442         if (col >= cstart) break;
4443         *cam++ = *ba++; bj++;
4444       }
4445       /* diagonal portion of A */
4446       ncols_d = ai[i+1] - ai[i];
4447       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4448       /* off-diagonal portion of A */
4449       for (j=jo; j<ncols_o; j++) {
4450         *cam++ = *ba++; bj++;
4451       }
4452     }
4453   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4454   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4455   PetscFunctionReturn(0);
4456 }
4457 
4458 #undef __FUNCT__
4459 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4460 /*@C
4461      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4462 
4463     Not Collective
4464 
4465    Input Parameters:
4466 +    A - the matrix
4467 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4468 -    row, col - index sets of rows and columns to extract (or NULL)
4469 
4470    Output Parameter:
4471 .    A_loc - the local sequential matrix generated
4472 
4473     Level: developer
4474 
4475 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4476 
4477 @*/
4478 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4479 {
4480   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4481   PetscErrorCode ierr;
4482   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4483   IS             isrowa,iscola;
4484   Mat            *aloc;
4485   PetscBool      match;
4486 
4487   PetscFunctionBegin;
4488   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4489   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4490   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4491   if (!row) {
4492     start = A->rmap->rstart; end = A->rmap->rend;
4493     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4494   } else {
4495     isrowa = *row;
4496   }
4497   if (!col) {
4498     start = A->cmap->rstart;
4499     cmap  = a->garray;
4500     nzA   = a->A->cmap->n;
4501     nzB   = a->B->cmap->n;
4502     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4503     ncols = 0;
4504     for (i=0; i<nzB; i++) {
4505       if (cmap[i] < start) idx[ncols++] = cmap[i];
4506       else break;
4507     }
4508     imark = i;
4509     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4510     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4511     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4512   } else {
4513     iscola = *col;
4514   }
4515   if (scall != MAT_INITIAL_MATRIX) {
4516     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4517     aloc[0] = *A_loc;
4518   }
4519   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4520   *A_loc = aloc[0];
4521   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4522   if (!row) {
4523     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4524   }
4525   if (!col) {
4526     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4527   }
4528   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4529   PetscFunctionReturn(0);
4530 }
4531 
4532 #undef __FUNCT__
4533 #define __FUNCT__ "MatGetBrowsOfAcols"
4534 /*@C
4535     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4536 
4537     Collective on Mat
4538 
4539    Input Parameters:
4540 +    A,B - the matrices in mpiaij format
4541 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4542 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4543 
4544    Output Parameter:
4545 +    rowb, colb - index sets of rows and columns of B to extract
4546 -    B_seq - the sequential matrix generated
4547 
4548     Level: developer
4549 
4550 @*/
4551 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4552 {
4553   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4554   PetscErrorCode ierr;
4555   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4556   IS             isrowb,iscolb;
4557   Mat            *bseq=NULL;
4558 
4559   PetscFunctionBegin;
4560   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4561     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4562   }
4563   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4564 
4565   if (scall == MAT_INITIAL_MATRIX) {
4566     start = A->cmap->rstart;
4567     cmap  = a->garray;
4568     nzA   = a->A->cmap->n;
4569     nzB   = a->B->cmap->n;
4570     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4571     ncols = 0;
4572     for (i=0; i<nzB; i++) {  /* row < local row index */
4573       if (cmap[i] < start) idx[ncols++] = cmap[i];
4574       else break;
4575     }
4576     imark = i;
4577     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4578     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4579     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4580     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4581   } else {
4582     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4583     isrowb  = *rowb; iscolb = *colb;
4584     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4585     bseq[0] = *B_seq;
4586   }
4587   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4588   *B_seq = bseq[0];
4589   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4590   if (!rowb) {
4591     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4592   } else {
4593     *rowb = isrowb;
4594   }
4595   if (!colb) {
4596     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4597   } else {
4598     *colb = iscolb;
4599   }
4600   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4601   PetscFunctionReturn(0);
4602 }
4603 
4604 #undef __FUNCT__
4605 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4606 /*
4607     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4608     of the OFF-DIAGONAL portion of local A
4609 
4610     Collective on Mat
4611 
4612    Input Parameters:
4613 +    A,B - the matrices in mpiaij format
4614 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4615 
4616    Output Parameter:
4617 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4618 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4619 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4620 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4621 
4622     Level: developer
4623 
4624 */
4625 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4626 {
4627   VecScatter_MPI_General *gen_to,*gen_from;
4628   PetscErrorCode         ierr;
4629   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4630   Mat_SeqAIJ             *b_oth;
4631   VecScatter             ctx =a->Mvctx;
4632   MPI_Comm               comm;
4633   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4634   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4635   PetscScalar            *rvalues,*svalues;
4636   MatScalar              *b_otha,*bufa,*bufA;
4637   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4638   MPI_Request            *rwaits = NULL,*swaits = NULL;
4639   MPI_Status             *sstatus,rstatus;
4640   PetscMPIInt            jj,size;
4641   PetscInt               *cols,sbs,rbs;
4642   PetscScalar            *vals;
4643 
4644   PetscFunctionBegin;
4645   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4646   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4647 
4648   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4649     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4650   }
4651   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4652   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4653 
4654   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4655   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4656   rvalues  = gen_from->values; /* holds the length of receiving row */
4657   svalues  = gen_to->values;   /* holds the length of sending row */
4658   nrecvs   = gen_from->n;
4659   nsends   = gen_to->n;
4660 
4661   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4662   srow    = gen_to->indices;    /* local row index to be sent */
4663   sstarts = gen_to->starts;
4664   sprocs  = gen_to->procs;
4665   sstatus = gen_to->sstatus;
4666   sbs     = gen_to->bs;
4667   rstarts = gen_from->starts;
4668   rprocs  = gen_from->procs;
4669   rbs     = gen_from->bs;
4670 
4671   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4672   if (scall == MAT_INITIAL_MATRIX) {
4673     /* i-array */
4674     /*---------*/
4675     /*  post receives */
4676     for (i=0; i<nrecvs; i++) {
4677       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4678       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4679       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4680     }
4681 
4682     /* pack the outgoing message */
4683     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4684 
4685     sstartsj[0] = 0;
4686     rstartsj[0] = 0;
4687     len         = 0; /* total length of j or a array to be sent */
4688     k           = 0;
4689     for (i=0; i<nsends; i++) {
4690       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4691       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4692       for (j=0; j<nrows; j++) {
4693         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4694         for (l=0; l<sbs; l++) {
4695           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4696 
4697           rowlen[j*sbs+l] = ncols;
4698 
4699           len += ncols;
4700           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4701         }
4702         k++;
4703       }
4704       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4705 
4706       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4707     }
4708     /* recvs and sends of i-array are completed */
4709     i = nrecvs;
4710     while (i--) {
4711       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4712     }
4713     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4714 
4715     /* allocate buffers for sending j and a arrays */
4716     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4717     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4718 
4719     /* create i-array of B_oth */
4720     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4721 
4722     b_othi[0] = 0;
4723     len       = 0; /* total length of j or a array to be received */
4724     k         = 0;
4725     for (i=0; i<nrecvs; i++) {
4726       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4727       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4728       for (j=0; j<nrows; j++) {
4729         b_othi[k+1] = b_othi[k] + rowlen[j];
4730         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4731         k++;
4732       }
4733       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4734     }
4735 
4736     /* allocate space for j and a arrrays of B_oth */
4737     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4738     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4739 
4740     /* j-array */
4741     /*---------*/
4742     /*  post receives of j-array */
4743     for (i=0; i<nrecvs; i++) {
4744       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4745       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4746     }
4747 
4748     /* pack the outgoing message j-array */
4749     k = 0;
4750     for (i=0; i<nsends; i++) {
4751       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4752       bufJ  = bufj+sstartsj[i];
4753       for (j=0; j<nrows; j++) {
4754         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4755         for (ll=0; ll<sbs; ll++) {
4756           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4757           for (l=0; l<ncols; l++) {
4758             *bufJ++ = cols[l];
4759           }
4760           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4761         }
4762       }
4763       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4764     }
4765 
4766     /* recvs and sends of j-array are completed */
4767     i = nrecvs;
4768     while (i--) {
4769       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4770     }
4771     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4772   } else if (scall == MAT_REUSE_MATRIX) {
4773     sstartsj = *startsj_s;
4774     rstartsj = *startsj_r;
4775     bufa     = *bufa_ptr;
4776     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4777     b_otha   = b_oth->a;
4778   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4779 
4780   /* a-array */
4781   /*---------*/
4782   /*  post receives of a-array */
4783   for (i=0; i<nrecvs; i++) {
4784     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4785     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4786   }
4787 
4788   /* pack the outgoing message a-array */
4789   k = 0;
4790   for (i=0; i<nsends; i++) {
4791     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4792     bufA  = bufa+sstartsj[i];
4793     for (j=0; j<nrows; j++) {
4794       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4795       for (ll=0; ll<sbs; ll++) {
4796         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4797         for (l=0; l<ncols; l++) {
4798           *bufA++ = vals[l];
4799         }
4800         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4801       }
4802     }
4803     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4804   }
4805   /* recvs and sends of a-array are completed */
4806   i = nrecvs;
4807   while (i--) {
4808     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4809   }
4810   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4811   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4812 
4813   if (scall == MAT_INITIAL_MATRIX) {
4814     /* put together the new matrix */
4815     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4816 
4817     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4818     /* Since these are PETSc arrays, change flags to free them as necessary. */
4819     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4820     b_oth->free_a  = PETSC_TRUE;
4821     b_oth->free_ij = PETSC_TRUE;
4822     b_oth->nonew   = 0;
4823 
4824     ierr = PetscFree(bufj);CHKERRQ(ierr);
4825     if (!startsj_s || !bufa_ptr) {
4826       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4827       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4828     } else {
4829       *startsj_s = sstartsj;
4830       *startsj_r = rstartsj;
4831       *bufa_ptr  = bufa;
4832     }
4833   }
4834   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4835   PetscFunctionReturn(0);
4836 }
4837 
4838 #undef __FUNCT__
4839 #define __FUNCT__ "MatGetCommunicationStructs"
4840 /*@C
4841   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4842 
4843   Not Collective
4844 
4845   Input Parameters:
4846 . A - The matrix in mpiaij format
4847 
4848   Output Parameter:
4849 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4850 . colmap - A map from global column index to local index into lvec
4851 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4852 
4853   Level: developer
4854 
4855 @*/
4856 #if defined(PETSC_USE_CTABLE)
4857 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4858 #else
4859 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4860 #endif
4861 {
4862   Mat_MPIAIJ *a;
4863 
4864   PetscFunctionBegin;
4865   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4866   PetscValidPointer(lvec, 2);
4867   PetscValidPointer(colmap, 3);
4868   PetscValidPointer(multScatter, 4);
4869   a = (Mat_MPIAIJ*) A->data;
4870   if (lvec) *lvec = a->lvec;
4871   if (colmap) *colmap = a->colmap;
4872   if (multScatter) *multScatter = a->Mvctx;
4873   PetscFunctionReturn(0);
4874 }
4875 
4876 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4877 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4878 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4879 #if defined(PETSC_HAVE_ELEMENTAL)
4880 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4881 #endif
4882 #if defined(PETSC_HAVE_HYPRE)
4883 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
4884 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
4885 #endif
4886 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
4887 
4888 #undef __FUNCT__
4889 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4890 /*
4891     Computes (B'*A')' since computing B*A directly is untenable
4892 
4893                n                       p                          p
4894         (              )       (              )         (                  )
4895       m (      A       )  *  n (       B      )   =   m (         C        )
4896         (              )       (              )         (                  )
4897 
4898 */
4899 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4900 {
4901   PetscErrorCode ierr;
4902   Mat            At,Bt,Ct;
4903 
4904   PetscFunctionBegin;
4905   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4906   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4907   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4908   ierr = MatDestroy(&At);CHKERRQ(ierr);
4909   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4910   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4911   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4912   PetscFunctionReturn(0);
4913 }
4914 
4915 #undef __FUNCT__
4916 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4917 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4918 {
4919   PetscErrorCode ierr;
4920   PetscInt       m=A->rmap->n,n=B->cmap->n;
4921   Mat            Cmat;
4922 
4923   PetscFunctionBegin;
4924   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4925   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4926   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4927   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4928   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4929   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4930   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4931   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4932 
4933   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4934 
4935   *C = Cmat;
4936   PetscFunctionReturn(0);
4937 }
4938 
4939 /* ----------------------------------------------------------------*/
4940 #undef __FUNCT__
4941 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4942 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4943 {
4944   PetscErrorCode ierr;
4945 
4946   PetscFunctionBegin;
4947   if (scall == MAT_INITIAL_MATRIX) {
4948     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4949     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4950     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4951   }
4952   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4953   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4954   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4955   PetscFunctionReturn(0);
4956 }
4957 
4958 /*MC
4959    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4960 
4961    Options Database Keys:
4962 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4963 
4964   Level: beginner
4965 
4966 .seealso: MatCreateAIJ()
4967 M*/
4968 
4969 #undef __FUNCT__
4970 #define __FUNCT__ "MatCreate_MPIAIJ"
4971 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
4972 {
4973   Mat_MPIAIJ     *b;
4974   PetscErrorCode ierr;
4975   PetscMPIInt    size;
4976 
4977   PetscFunctionBegin;
4978   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
4979 
4980   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
4981   B->data       = (void*)b;
4982   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4983   B->assembled  = PETSC_FALSE;
4984   B->insertmode = NOT_SET_VALUES;
4985   b->size       = size;
4986 
4987   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
4988 
4989   /* build cache for off array entries formed */
4990   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
4991 
4992   b->donotstash  = PETSC_FALSE;
4993   b->colmap      = 0;
4994   b->garray      = 0;
4995   b->roworiented = PETSC_TRUE;
4996 
4997   /* stuff used for matrix vector multiply */
4998   b->lvec  = NULL;
4999   b->Mvctx = NULL;
5000 
5001   /* stuff for MatGetRow() */
5002   b->rowindices   = 0;
5003   b->rowvalues    = 0;
5004   b->getrowactive = PETSC_FALSE;
5005 
5006   /* flexible pointer used in CUSP/CUSPARSE classes */
5007   b->spptr = NULL;
5008 
5009   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5010   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5011   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5012   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5013   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5014   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5015   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5016   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5017   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5018   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5019 #if defined(PETSC_HAVE_ELEMENTAL)
5020   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5021 #endif
5022 #if defined(PETSC_HAVE_HYPRE)
5023   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5024 #endif
5025   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5026   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5027   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5029 #if defined(PETSC_HAVE_HYPRE)
5030   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5031 #endif
5032   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5033   PetscFunctionReturn(0);
5034 }
5035 
5036 #undef __FUNCT__
5037 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5038 /*@C
5039      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5040          and "off-diagonal" part of the matrix in CSR format.
5041 
5042    Collective on MPI_Comm
5043 
5044    Input Parameters:
5045 +  comm - MPI communicator
5046 .  m - number of local rows (Cannot be PETSC_DECIDE)
5047 .  n - This value should be the same as the local size used in creating the
5048        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5049        calculated if N is given) For square matrices n is almost always m.
5050 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5051 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5052 .   i - row indices for "diagonal" portion of matrix
5053 .   j - column indices
5054 .   a - matrix values
5055 .   oi - row indices for "off-diagonal" portion of matrix
5056 .   oj - column indices
5057 -   oa - matrix values
5058 
5059    Output Parameter:
5060 .   mat - the matrix
5061 
5062    Level: advanced
5063 
5064    Notes:
5065        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5066        must free the arrays once the matrix has been destroyed and not before.
5067 
5068        The i and j indices are 0 based
5069 
5070        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5071 
5072        This sets local rows and cannot be used to set off-processor values.
5073 
5074        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5075        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5076        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5077        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5078        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5079        communication if it is known that only local entries will be set.
5080 
5081 .keywords: matrix, aij, compressed row, sparse, parallel
5082 
5083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5084           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5085 @*/
5086 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5087 {
5088   PetscErrorCode ierr;
5089   Mat_MPIAIJ     *maij;
5090 
5091   PetscFunctionBegin;
5092   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5093   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5094   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5095   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5096   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5097   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5098   maij = (Mat_MPIAIJ*) (*mat)->data;
5099 
5100   (*mat)->preallocated = PETSC_TRUE;
5101 
5102   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5103   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5104 
5105   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5106   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5107 
5108   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5109   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5110   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5111   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5112 
5113   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5114   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5115   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5116   PetscFunctionReturn(0);
5117 }
5118 
5119 /*
5120     Special version for direct calls from Fortran
5121 */
5122 #include <petsc/private/fortranimpl.h>
5123 
5124 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5125 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5126 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5127 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5128 #endif
5129 
5130 /* Change these macros so can be used in void function */
5131 #undef CHKERRQ
5132 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5133 #undef SETERRQ2
5134 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5135 #undef SETERRQ3
5136 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5137 #undef SETERRQ
5138 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5139 
5140 #undef __FUNCT__
5141 #define __FUNCT__ "matsetvaluesmpiaij_"
5142 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5143 {
5144   Mat            mat  = *mmat;
5145   PetscInt       m    = *mm, n = *mn;
5146   InsertMode     addv = *maddv;
5147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5148   PetscScalar    value;
5149   PetscErrorCode ierr;
5150 
5151   MatCheckPreallocated(mat,1);
5152   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5153 
5154 #if defined(PETSC_USE_DEBUG)
5155   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5156 #endif
5157   {
5158     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5159     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5160     PetscBool roworiented = aij->roworiented;
5161 
5162     /* Some Variables required in the macro */
5163     Mat        A                 = aij->A;
5164     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5165     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5166     MatScalar  *aa               = a->a;
5167     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5168     Mat        B                 = aij->B;
5169     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5170     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5171     MatScalar  *ba               = b->a;
5172 
5173     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5174     PetscInt  nonew = a->nonew;
5175     MatScalar *ap1,*ap2;
5176 
5177     PetscFunctionBegin;
5178     for (i=0; i<m; i++) {
5179       if (im[i] < 0) continue;
5180 #if defined(PETSC_USE_DEBUG)
5181       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5182 #endif
5183       if (im[i] >= rstart && im[i] < rend) {
5184         row      = im[i] - rstart;
5185         lastcol1 = -1;
5186         rp1      = aj + ai[row];
5187         ap1      = aa + ai[row];
5188         rmax1    = aimax[row];
5189         nrow1    = ailen[row];
5190         low1     = 0;
5191         high1    = nrow1;
5192         lastcol2 = -1;
5193         rp2      = bj + bi[row];
5194         ap2      = ba + bi[row];
5195         rmax2    = bimax[row];
5196         nrow2    = bilen[row];
5197         low2     = 0;
5198         high2    = nrow2;
5199 
5200         for (j=0; j<n; j++) {
5201           if (roworiented) value = v[i*n+j];
5202           else value = v[i+j*m];
5203           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5204           if (in[j] >= cstart && in[j] < cend) {
5205             col = in[j] - cstart;
5206             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5207           } else if (in[j] < 0) continue;
5208 #if defined(PETSC_USE_DEBUG)
5209           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5210 #endif
5211           else {
5212             if (mat->was_assembled) {
5213               if (!aij->colmap) {
5214                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5215               }
5216 #if defined(PETSC_USE_CTABLE)
5217               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5218               col--;
5219 #else
5220               col = aij->colmap[in[j]] - 1;
5221 #endif
5222               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5223                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5224                 col  =  in[j];
5225                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5226                 B     = aij->B;
5227                 b     = (Mat_SeqAIJ*)B->data;
5228                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5229                 rp2   = bj + bi[row];
5230                 ap2   = ba + bi[row];
5231                 rmax2 = bimax[row];
5232                 nrow2 = bilen[row];
5233                 low2  = 0;
5234                 high2 = nrow2;
5235                 bm    = aij->B->rmap->n;
5236                 ba    = b->a;
5237               }
5238             } else col = in[j];
5239             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5240           }
5241         }
5242       } else if (!aij->donotstash) {
5243         if (roworiented) {
5244           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5245         } else {
5246           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5247         }
5248       }
5249     }
5250   }
5251   PetscFunctionReturnVoid();
5252 }
5253 
5254