xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 517e223e559585e817ecab9cb2b8dc730049da52)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685   InsertMode     addv;
686 
687   PetscFunctionBegin;
688   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
689 
690   /* make sure all processors are either in INSERTMODE or ADDMODE */
691   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
692   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
693   mat->insertmode = addv; /* in case this processor had no cache */
694 
695   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
696   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
697   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
703 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
704 {
705   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
706   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
707   PetscErrorCode ierr;
708   PetscMPIInt    n;
709   PetscInt       i,j,rstart,ncols,flg;
710   PetscInt       *row,*col;
711   PetscBool      other_disassembled;
712   PetscScalar    *val;
713   InsertMode     addv = mat->insertmode;
714 
715   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
716 
717   PetscFunctionBegin;
718   if (!aij->donotstash && !mat->nooffprocentries) {
719     while (1) {
720       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
721       if (!flg) break;
722 
723       for (i=0; i<n; ) {
724         /* Now identify the consecutive vals belonging to the same row */
725         for (j=i,rstart=row[j]; j<n; j++) {
726           if (row[j] != rstart) break;
727         }
728         if (j < n) ncols = j-i;
729         else       ncols = n-i;
730         /* Now assemble all these values with a single function call */
731         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
732 
733         i = j;
734       }
735     }
736     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
737   }
738   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
740 
741   /* determine if any processor has disassembled, if so we must
742      also disassemble ourselfs, in order that we may reassemble. */
743   /*
744      if nonzero structure of submatrix B cannot change then we know that
745      no processor disassembled thus we can skip this stuff
746   */
747   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
748     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
749     if (mat->was_assembled && !other_disassembled) {
750       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
751     }
752   }
753   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
754     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
755   }
756   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
757   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
758   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
759 
760   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
761 
762   aij->rowvalues = 0;
763 
764   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
765   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
766 
767   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
768   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
769     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
770     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNCT__
776 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
777 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
778 {
779   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
780   PetscErrorCode ierr;
781 
782   PetscFunctionBegin;
783   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
784   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatZeroRows_MPIAIJ"
790 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
791 {
792   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
793   PetscInt      *owners = A->rmap->range;
794   PetscInt       n      = A->rmap->n;
795   PetscSF        sf;
796   PetscInt      *lrows;
797   PetscSFNode   *rrows;
798   PetscInt       r, p = 0, len = 0;
799   PetscErrorCode ierr;
800 
801   PetscFunctionBegin;
802   /* Create SF where leaves are input rows and roots are owned rows */
803   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
804   for (r = 0; r < n; ++r) lrows[r] = -1;
805   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
806   for (r = 0; r < N; ++r) {
807     const PetscInt idx   = rows[r];
808     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
809     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
810       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
811     }
812     if (A->nooffproczerorows) {
813       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
814       lrows[len++] = idx - owners[p];
815     } else {
816       rrows[r].rank = p;
817       rrows[r].index = rows[r] - owners[p];
818     }
819   }
820   if (!A->nooffproczerorows) {
821     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
822     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
823     /* Collect flags for rows to be zeroed */
824     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
825     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
826     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
827     /* Compress and put in row numbers */
828     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
829   }
830   /* fix right hand side if needed */
831   if (x && b) {
832     const PetscScalar *xx;
833     PetscScalar       *bb;
834 
835     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
836     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
837     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
838     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
839     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
840   }
841   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
843   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
844     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
848     for (r = 0; r < len; ++r) {
849       const PetscInt row = lrows[r] + A->rmap->rstart;
850       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
851     }
852     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
853     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
854   } else {
855     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
856   }
857   ierr = PetscFree(lrows);CHKERRQ(ierr);
858 
859   /* only change matrix nonzero state if pattern was allowed to be changed */
860   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
861     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
862     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
869 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
870 {
871   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
872   PetscErrorCode    ierr;
873   PetscMPIInt       n = A->rmap->n;
874   PetscInt          i,j,r,m,p = 0,len = 0;
875   PetscInt          *lrows,*owners = A->rmap->range;
876   PetscSFNode       *rrows;
877   PetscSF           sf;
878   const PetscScalar *xx;
879   PetscScalar       *bb,*mask;
880   Vec               xmask,lmask;
881   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
882   const PetscInt    *aj, *ii,*ridx;
883   PetscScalar       *aa;
884 
885   PetscFunctionBegin;
886   /* Create SF where leaves are input rows and roots are owned rows */
887   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
888   for (r = 0; r < n; ++r) lrows[r] = -1;
889   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
890   for (r = 0; r < N; ++r) {
891     const PetscInt idx   = rows[r];
892     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
893     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
894       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
895     }
896     rrows[r].rank  = p;
897     rrows[r].index = rows[r] - owners[p];
898   }
899   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
900   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
901   /* Collect flags for rows to be zeroed */
902   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
903   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
904   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
905   /* Compress and put in row numbers */
906   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
907   /* zero diagonal part of matrix */
908   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
909   /* handle off diagonal part of matrix */
910   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
911   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
912   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
913   for (i=0; i<len; i++) bb[lrows[i]] = 1;
914   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
915   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
918   if (x) {
919     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
922     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
923   }
924   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
925   /* remove zeroed rows of off diagonal matrix */
926   ii = aij->i;
927   for (i=0; i<len; i++) {
928     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
929   }
930   /* loop over all elements of off process part of matrix zeroing removed columns*/
931   if (aij->compressedrow.use) {
932     m    = aij->compressedrow.nrows;
933     ii   = aij->compressedrow.i;
934     ridx = aij->compressedrow.rindex;
935     for (i=0; i<m; i++) {
936       n  = ii[i+1] - ii[i];
937       aj = aij->j + ii[i];
938       aa = aij->a + ii[i];
939 
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[*ridx] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948       ridx++;
949     }
950   } else { /* do not use compressed row format */
951     m = l->B->rmap->n;
952     for (i=0; i<m; i++) {
953       n  = ii[i+1] - ii[i];
954       aj = aij->j + ii[i];
955       aa = aij->a + ii[i];
956       for (j=0; j<n; j++) {
957         if (PetscAbsScalar(mask[*aj])) {
958           if (b) bb[i] -= *aa*xx[*aj];
959           *aa = 0.0;
960         }
961         aa++;
962         aj++;
963       }
964     }
965   }
966   if (x) {
967     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
968     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
969   }
970   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
971   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
972   ierr = PetscFree(lrows);CHKERRQ(ierr);
973 
974   /* only change matrix nonzero state if pattern was allowed to be changed */
975   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
976     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
977     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatMult_MPIAIJ"
984 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
985 {
986   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
987   PetscErrorCode ierr;
988   PetscInt       nt;
989 
990   PetscFunctionBegin;
991   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
992   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
993   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
995   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1002 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1003 {
1004   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1014 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1015 {
1016   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1022   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1029 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1030 {
1031   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1032   PetscErrorCode ierr;
1033   PetscBool      merged;
1034 
1035   PetscFunctionBegin;
1036   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1037   /* do nondiagonal part */
1038   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1039   if (!merged) {
1040     /* send it on its way */
1041     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1042     /* do local part */
1043     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1044     /* receive remote parts: note this assumes the values are not actually */
1045     /* added in yy until the next line, */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   } else {
1048     /* do local part */
1049     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1050     /* send it on its way */
1051     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1052     /* values actually were received in the Begin() but we need to call this nop */
1053     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1060 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1061 {
1062   MPI_Comm       comm;
1063   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1064   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1065   IS             Me,Notme;
1066   PetscErrorCode ierr;
1067   PetscInt       M,N,first,last,*notme,i;
1068   PetscMPIInt    size;
1069 
1070   PetscFunctionBegin;
1071   /* Easy test: symmetric diagonal block */
1072   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1073   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1074   if (!*f) PetscFunctionReturn(0);
1075   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1076   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1077   if (size == 1) PetscFunctionReturn(0);
1078 
1079   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1080   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1081   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1082   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1083   for (i=0; i<first; i++) notme[i] = i;
1084   for (i=last; i<M; i++) notme[i-last+first] = i;
1085   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1086   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1087   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1088   Aoff = Aoffs[0];
1089   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1090   Boff = Boffs[0];
1091   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1092   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1093   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1094   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1095   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1096   ierr = PetscFree(notme);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1102 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   /* do nondiagonal part */
1109   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1110   /* send it on its way */
1111   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1112   /* do local part */
1113   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1114   /* receive remote parts */
1115   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 /*
1120   This only works correctly for square matrices where the subblock A->A is the
1121    diagonal block
1122 */
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1125 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1126 {
1127   PetscErrorCode ierr;
1128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1129 
1130   PetscFunctionBegin;
1131   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1132   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1133   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatScale_MPIAIJ"
1139 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1140 {
1141   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1146   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatDestroy_MPIAIJ"
1152 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1153 {
1154   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158 #if defined(PETSC_USE_LOG)
1159   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1160 #endif
1161   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1162   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1163   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1164   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1165 #if defined(PETSC_USE_CTABLE)
1166   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1167 #else
1168   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1169 #endif
1170   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1171   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1172   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1173   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1174   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1175   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1176 
1177   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1186 #if defined(PETSC_HAVE_ELEMENTAL)
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1188 #endif
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1194 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1195 {
1196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1197   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1198   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1199   PetscErrorCode ierr;
1200   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1201   int            fd;
1202   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1203   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1204   PetscScalar    *column_values;
1205   PetscInt       message_count,flowcontrolcount;
1206   FILE           *file;
1207 
1208   PetscFunctionBegin;
1209   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1210   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1211   nz   = A->nz + B->nz;
1212   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1213   if (!rank) {
1214     header[0] = MAT_FILE_CLASSID;
1215     header[1] = mat->rmap->N;
1216     header[2] = mat->cmap->N;
1217 
1218     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     /* get largest number of rows any processor has */
1221     rlen  = mat->rmap->n;
1222     range = mat->rmap->range;
1223     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1224   } else {
1225     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226     rlen = mat->rmap->n;
1227   }
1228 
1229   /* load up the local row counts */
1230   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1231   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1232 
1233   /* store the row lengths to the file */
1234   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1235   if (!rank) {
1236     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1239       rlen = range[i+1] - range[i];
1240       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1246     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1248   }
1249   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1250 
1251   /* load up the local column indices */
1252   nzmax = nz; /* th processor needs space a largest processor needs */
1253   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1255   cnt   = 0;
1256   for (i=0; i<mat->rmap->n; i++) {
1257     for (j=B->i[i]; j<B->i[i+1]; j++) {
1258       if ((col = garray[B->j[j]]) > cstart) break;
1259       column_indices[cnt++] = col;
1260     }
1261     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1262     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1263   }
1264   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1265 
1266   /* store the column indices to the file */
1267   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1268   if (!rank) {
1269     MPI_Status status;
1270     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1271     for (i=1; i<size; i++) {
1272       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1273       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1274       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1275       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1277     }
1278     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1279   } else {
1280     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1281     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1284   }
1285   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1286 
1287   /* load up the local column values */
1288   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1289   cnt  = 0;
1290   for (i=0; i<mat->rmap->n; i++) {
1291     for (j=B->i[i]; j<B->i[i+1]; j++) {
1292       if (garray[B->j[j]] > cstart) break;
1293       column_values[cnt++] = B->a[j];
1294     }
1295     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1296     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1297   }
1298   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1299 
1300   /* store the column values to the file */
1301   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1302   if (!rank) {
1303     MPI_Status status;
1304     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1305     for (i=1; i<size; i++) {
1306       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1307       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1308       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1309       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1311     }
1312     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1313   } else {
1314     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1315     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1316     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1317     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1318   }
1319   ierr = PetscFree(column_values);CHKERRQ(ierr);
1320 
1321   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1322   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #include <petscdraw.h>
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1329 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1330 {
1331   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1332   PetscErrorCode    ierr;
1333   PetscMPIInt       rank = aij->rank,size = aij->size;
1334   PetscBool         isdraw,iascii,isbinary;
1335   PetscViewer       sviewer;
1336   PetscViewerFormat format;
1337 
1338   PetscFunctionBegin;
1339   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1340   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1341   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1342   if (iascii) {
1343     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1344     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1345       MatInfo   info;
1346       PetscBool inodes;
1347 
1348       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1349       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1350       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1351       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1352       if (!inodes) {
1353         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1354                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1355       } else {
1356         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1357                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1358       }
1359       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1360       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1361       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1362       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1363       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1365       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1366       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1367       PetscFunctionReturn(0);
1368     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1369       PetscInt inodecount,inodelimit,*inodes;
1370       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1371       if (inodes) {
1372         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1373       } else {
1374         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1375       }
1376       PetscFunctionReturn(0);
1377     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1378       PetscFunctionReturn(0);
1379     }
1380   } else if (isbinary) {
1381     if (size == 1) {
1382       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1383       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1384     } else {
1385       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1386     }
1387     PetscFunctionReturn(0);
1388   } else if (isdraw) {
1389     PetscDraw draw;
1390     PetscBool isnull;
1391     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1392     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1393   }
1394 
1395   {
1396     /* assemble the entire matrix onto first processor. */
1397     Mat        A;
1398     Mat_SeqAIJ *Aloc;
1399     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1400     MatScalar  *a;
1401 
1402     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1403     if (!rank) {
1404       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1405     } else {
1406       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1407     }
1408     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1409     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1410     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1411     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1412     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1413 
1414     /* copy over the A part */
1415     Aloc = (Mat_SeqAIJ*)aij->A->data;
1416     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1417     row  = mat->rmap->rstart;
1418     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1419     for (i=0; i<m; i++) {
1420       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1421       row++;
1422       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1423     }
1424     aj = Aloc->j;
1425     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1426 
1427     /* copy over the B part */
1428     Aloc = (Mat_SeqAIJ*)aij->B->data;
1429     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1430     row  = mat->rmap->rstart;
1431     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1432     ct   = cols;
1433     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1434     for (i=0; i<m; i++) {
1435       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1436       row++;
1437       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1438     }
1439     ierr = PetscFree(ct);CHKERRQ(ierr);
1440     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1441     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1442     /*
1443        Everyone has to call to draw the matrix since the graphics waits are
1444        synchronized across all processors that share the PetscDraw object
1445     */
1446     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1447     if (!rank) {
1448       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1449       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1450     }
1451     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1452     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1453     ierr = MatDestroy(&A);CHKERRQ(ierr);
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ"
1460 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1461 {
1462   PetscErrorCode ierr;
1463   PetscBool      iascii,isdraw,issocket,isbinary;
1464 
1465   PetscFunctionBegin;
1466   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1468   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1469   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1470   if (iascii || isdraw || isbinary || issocket) {
1471     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatSOR_MPIAIJ"
1478 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1479 {
1480   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1481   PetscErrorCode ierr;
1482   Vec            bb1 = 0;
1483   PetscBool      hasop;
1484 
1485   PetscFunctionBegin;
1486   if (flag == SOR_APPLY_UPPER) {
1487     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1488     PetscFunctionReturn(0);
1489   }
1490 
1491   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1492     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1493   }
1494 
1495   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1496     if (flag & SOR_ZERO_INITIAL_GUESS) {
1497       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1498       its--;
1499     }
1500 
1501     while (its--) {
1502       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504 
1505       /* update rhs: bb1 = bb - B*x */
1506       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1507       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1508 
1509       /* local sweep */
1510       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1511     }
1512   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1513     if (flag & SOR_ZERO_INITIAL_GUESS) {
1514       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1515       its--;
1516     }
1517     while (its--) {
1518       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520 
1521       /* update rhs: bb1 = bb - B*x */
1522       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1523       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1524 
1525       /* local sweep */
1526       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1527     }
1528   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1529     if (flag & SOR_ZERO_INITIAL_GUESS) {
1530       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1531       its--;
1532     }
1533     while (its--) {
1534       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536 
1537       /* update rhs: bb1 = bb - B*x */
1538       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1539       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1540 
1541       /* local sweep */
1542       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1543     }
1544   } else if (flag & SOR_EISENSTAT) {
1545     Vec xx1;
1546 
1547     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1548     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1549 
1550     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552     if (!mat->diag) {
1553       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1554       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1555     }
1556     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1557     if (hasop) {
1558       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1559     } else {
1560       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1561     }
1562     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1563 
1564     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1565 
1566     /* local sweep */
1567     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1568     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1569     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1570   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1571 
1572   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatPermute_MPIAIJ"
1578 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1579 {
1580   Mat            aA,aB,Aperm;
1581   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1582   PetscScalar    *aa,*ba;
1583   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1584   PetscSF        rowsf,sf;
1585   IS             parcolp = NULL;
1586   PetscBool      done;
1587   PetscErrorCode ierr;
1588 
1589   PetscFunctionBegin;
1590   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1591   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1592   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1593   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1594 
1595   /* Invert row permutation to find out where my rows should go */
1596   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1597   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1598   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1599   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1600   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1601   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1602 
1603   /* Invert column permutation to find out where my columns should go */
1604   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1605   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1606   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1607   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1608   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1609   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1610   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1611 
1612   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1613   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1614   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1615 
1616   /* Find out where my gcols should go */
1617   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1618   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1619   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1620   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1621   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1622   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1623   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1624   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1625 
1626   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1627   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1628   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1629   for (i=0; i<m; i++) {
1630     PetscInt row = rdest[i],rowner;
1631     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1632     for (j=ai[i]; j<ai[i+1]; j++) {
1633       PetscInt cowner,col = cdest[aj[j]];
1634       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1635       if (rowner == cowner) dnnz[i]++;
1636       else onnz[i]++;
1637     }
1638     for (j=bi[i]; j<bi[i+1]; j++) {
1639       PetscInt cowner,col = gcdest[bj[j]];
1640       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1641       if (rowner == cowner) dnnz[i]++;
1642       else onnz[i]++;
1643     }
1644   }
1645   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1646   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1647   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1648   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1649   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1650 
1651   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1652   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1653   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1654   for (i=0; i<m; i++) {
1655     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1656     PetscInt j0,rowlen;
1657     rowlen = ai[i+1] - ai[i];
1658     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1659       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1660       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1661     }
1662     rowlen = bi[i+1] - bi[i];
1663     for (j0=j=0; j<rowlen; j0=j) {
1664       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1665       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1666     }
1667   }
1668   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1671   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1672   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1673   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1674   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1675   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1676   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1677   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1678   *B = Aperm;
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1684 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1685 {
1686   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1687   PetscErrorCode ierr;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1691   if (ghosts) *ghosts = aij->garray;
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1697 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1698 {
1699   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1700   Mat            A    = mat->A,B = mat->B;
1701   PetscErrorCode ierr;
1702   PetscReal      isend[5],irecv[5];
1703 
1704   PetscFunctionBegin;
1705   info->block_size = 1.0;
1706   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1707 
1708   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1709   isend[3] = info->memory;  isend[4] = info->mallocs;
1710 
1711   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1712 
1713   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1714   isend[3] += info->memory;  isend[4] += info->mallocs;
1715   if (flag == MAT_LOCAL) {
1716     info->nz_used      = isend[0];
1717     info->nz_allocated = isend[1];
1718     info->nz_unneeded  = isend[2];
1719     info->memory       = isend[3];
1720     info->mallocs      = isend[4];
1721   } else if (flag == MAT_GLOBAL_MAX) {
1722     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1723 
1724     info->nz_used      = irecv[0];
1725     info->nz_allocated = irecv[1];
1726     info->nz_unneeded  = irecv[2];
1727     info->memory       = irecv[3];
1728     info->mallocs      = irecv[4];
1729   } else if (flag == MAT_GLOBAL_SUM) {
1730     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1731 
1732     info->nz_used      = irecv[0];
1733     info->nz_allocated = irecv[1];
1734     info->nz_unneeded  = irecv[2];
1735     info->memory       = irecv[3];
1736     info->mallocs      = irecv[4];
1737   }
1738   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1739   info->fill_ratio_needed = 0;
1740   info->factor_mallocs    = 0;
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatSetOption_MPIAIJ"
1746 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1747 {
1748   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1749   PetscErrorCode ierr;
1750 
1751   PetscFunctionBegin;
1752   switch (op) {
1753   case MAT_NEW_NONZERO_LOCATIONS:
1754   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1755   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1756   case MAT_KEEP_NONZERO_PATTERN:
1757   case MAT_NEW_NONZERO_LOCATION_ERR:
1758   case MAT_USE_INODES:
1759   case MAT_IGNORE_ZERO_ENTRIES:
1760     MatCheckPreallocated(A,1);
1761     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1762     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1763     break;
1764   case MAT_ROW_ORIENTED:
1765     a->roworiented = flg;
1766 
1767     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1768     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1769     break;
1770   case MAT_NEW_DIAGONALS:
1771     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1772     break;
1773   case MAT_IGNORE_OFF_PROC_ENTRIES:
1774     a->donotstash = flg;
1775     break;
1776   case MAT_SPD:
1777     A->spd_set = PETSC_TRUE;
1778     A->spd     = flg;
1779     if (flg) {
1780       A->symmetric                  = PETSC_TRUE;
1781       A->structurally_symmetric     = PETSC_TRUE;
1782       A->symmetric_set              = PETSC_TRUE;
1783       A->structurally_symmetric_set = PETSC_TRUE;
1784     }
1785     break;
1786   case MAT_SYMMETRIC:
1787     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1788     break;
1789   case MAT_STRUCTURALLY_SYMMETRIC:
1790     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1791     break;
1792   case MAT_HERMITIAN:
1793     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1794     break;
1795   case MAT_SYMMETRY_ETERNAL:
1796     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1797     break;
1798   default:
1799     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatGetRow_MPIAIJ"
1806 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1807 {
1808   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1809   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1810   PetscErrorCode ierr;
1811   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1812   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1813   PetscInt       *cmap,*idx_p;
1814 
1815   PetscFunctionBegin;
1816   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1817   mat->getrowactive = PETSC_TRUE;
1818 
1819   if (!mat->rowvalues && (idx || v)) {
1820     /*
1821         allocate enough space to hold information from the longest row.
1822     */
1823     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1824     PetscInt   max = 1,tmp;
1825     for (i=0; i<matin->rmap->n; i++) {
1826       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1827       if (max < tmp) max = tmp;
1828     }
1829     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1830   }
1831 
1832   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1833   lrow = row - rstart;
1834 
1835   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1836   if (!v)   {pvA = 0; pvB = 0;}
1837   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1838   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1839   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1840   nztot = nzA + nzB;
1841 
1842   cmap = mat->garray;
1843   if (v  || idx) {
1844     if (nztot) {
1845       /* Sort by increasing column numbers, assuming A and B already sorted */
1846       PetscInt imark = -1;
1847       if (v) {
1848         *v = v_p = mat->rowvalues;
1849         for (i=0; i<nzB; i++) {
1850           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1851           else break;
1852         }
1853         imark = i;
1854         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1855         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1856       }
1857       if (idx) {
1858         *idx = idx_p = mat->rowindices;
1859         if (imark > -1) {
1860           for (i=0; i<imark; i++) {
1861             idx_p[i] = cmap[cworkB[i]];
1862           }
1863         } else {
1864           for (i=0; i<nzB; i++) {
1865             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1866             else break;
1867           }
1868           imark = i;
1869         }
1870         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1871         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1872       }
1873     } else {
1874       if (idx) *idx = 0;
1875       if (v)   *v   = 0;
1876     }
1877   }
1878   *nz  = nztot;
1879   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1880   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1886 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1887 {
1888   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1889 
1890   PetscFunctionBegin;
1891   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1892   aij->getrowactive = PETSC_FALSE;
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatNorm_MPIAIJ"
1898 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1899 {
1900   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1901   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1902   PetscErrorCode ierr;
1903   PetscInt       i,j,cstart = mat->cmap->rstart;
1904   PetscReal      sum = 0.0;
1905   MatScalar      *v;
1906 
1907   PetscFunctionBegin;
1908   if (aij->size == 1) {
1909     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1910   } else {
1911     if (type == NORM_FROBENIUS) {
1912       v = amat->a;
1913       for (i=0; i<amat->nz; i++) {
1914         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1915       }
1916       v = bmat->a;
1917       for (i=0; i<bmat->nz; i++) {
1918         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1919       }
1920       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1921       *norm = PetscSqrtReal(*norm);
1922     } else if (type == NORM_1) { /* max column norm */
1923       PetscReal *tmp,*tmp2;
1924       PetscInt  *jj,*garray = aij->garray;
1925       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1926       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1927       *norm = 0.0;
1928       v     = amat->a; jj = amat->j;
1929       for (j=0; j<amat->nz; j++) {
1930         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1931       }
1932       v = bmat->a; jj = bmat->j;
1933       for (j=0; j<bmat->nz; j++) {
1934         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1935       }
1936       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1937       for (j=0; j<mat->cmap->N; j++) {
1938         if (tmp2[j] > *norm) *norm = tmp2[j];
1939       }
1940       ierr = PetscFree(tmp);CHKERRQ(ierr);
1941       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1942     } else if (type == NORM_INFINITY) { /* max row norm */
1943       PetscReal ntemp = 0.0;
1944       for (j=0; j<aij->A->rmap->n; j++) {
1945         v   = amat->a + amat->i[j];
1946         sum = 0.0;
1947         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1948           sum += PetscAbsScalar(*v); v++;
1949         }
1950         v = bmat->a + bmat->i[j];
1951         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1952           sum += PetscAbsScalar(*v); v++;
1953         }
1954         if (sum > ntemp) ntemp = sum;
1955       }
1956       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1957     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1958   }
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatTranspose_MPIAIJ"
1964 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1965 {
1966   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1967   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1968   PetscErrorCode ierr;
1969   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1970   PetscInt       cstart = A->cmap->rstart,ncol;
1971   Mat            B;
1972   MatScalar      *array;
1973 
1974   PetscFunctionBegin;
1975   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1976 
1977   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1978   ai = Aloc->i; aj = Aloc->j;
1979   bi = Bloc->i; bj = Bloc->j;
1980   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1981     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1982     PetscSFNode          *oloc;
1983     PETSC_UNUSED PetscSF sf;
1984 
1985     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1986     /* compute d_nnz for preallocation */
1987     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1988     for (i=0; i<ai[ma]; i++) {
1989       d_nnz[aj[i]]++;
1990       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1991     }
1992     /* compute local off-diagonal contributions */
1993     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1994     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1995     /* map those to global */
1996     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1997     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1998     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1999     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2000     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2001     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2002     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2003 
2004     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2005     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2006     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2007     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2008     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2009     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2010   } else {
2011     B    = *matout;
2012     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2013     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2014   }
2015 
2016   /* copy over the A part */
2017   array = Aloc->a;
2018   row   = A->rmap->rstart;
2019   for (i=0; i<ma; i++) {
2020     ncol = ai[i+1]-ai[i];
2021     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2022     row++;
2023     array += ncol; aj += ncol;
2024   }
2025   aj = Aloc->j;
2026   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2027 
2028   /* copy over the B part */
2029   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2030   array = Bloc->a;
2031   row   = A->rmap->rstart;
2032   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2033   cols_tmp = cols;
2034   for (i=0; i<mb; i++) {
2035     ncol = bi[i+1]-bi[i];
2036     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2037     row++;
2038     array += ncol; cols_tmp += ncol;
2039   }
2040   ierr = PetscFree(cols);CHKERRQ(ierr);
2041 
2042   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2043   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2044   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2045     *matout = B;
2046   } else {
2047     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2048   }
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2054 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2055 {
2056   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2057   Mat            a    = aij->A,b = aij->B;
2058   PetscErrorCode ierr;
2059   PetscInt       s1,s2,s3;
2060 
2061   PetscFunctionBegin;
2062   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2063   if (rr) {
2064     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2065     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2066     /* Overlap communication with computation. */
2067     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2068   }
2069   if (ll) {
2070     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2071     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2072     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2073   }
2074   /* scale  the diagonal block */
2075   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2076 
2077   if (rr) {
2078     /* Do a scatter end and then right scale the off-diagonal block */
2079     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2080     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2081   }
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2087 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2088 {
2089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2090   PetscErrorCode ierr;
2091 
2092   PetscFunctionBegin;
2093   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatEqual_MPIAIJ"
2099 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2100 {
2101   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2102   Mat            a,b,c,d;
2103   PetscBool      flg;
2104   PetscErrorCode ierr;
2105 
2106   PetscFunctionBegin;
2107   a = matA->A; b = matA->B;
2108   c = matB->A; d = matB->B;
2109 
2110   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2111   if (flg) {
2112     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2113   }
2114   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatCopy_MPIAIJ"
2120 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2121 {
2122   PetscErrorCode ierr;
2123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2124   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2125 
2126   PetscFunctionBegin;
2127   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2128   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2129     /* because of the column compression in the off-processor part of the matrix a->B,
2130        the number of columns in a->B and b->B may be different, hence we cannot call
2131        the MatCopy() directly on the two parts. If need be, we can provide a more
2132        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2133        then copying the submatrices */
2134     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2135   } else {
2136     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2137     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2138   }
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatSetUp_MPIAIJ"
2144 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2145 {
2146   PetscErrorCode ierr;
2147 
2148   PetscFunctionBegin;
2149   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 /*
2154    Computes the number of nonzeros per row needed for preallocation when X and Y
2155    have different nonzero structure.
2156 */
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2159 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2160 {
2161   PetscInt       i,j,k,nzx,nzy;
2162 
2163   PetscFunctionBegin;
2164   /* Set the number of nonzeros in the new matrix */
2165   for (i=0; i<m; i++) {
2166     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2167     nzx = xi[i+1] - xi[i];
2168     nzy = yi[i+1] - yi[i];
2169     nnz[i] = 0;
2170     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2171       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2172       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2173       nnz[i]++;
2174     }
2175     for (; k<nzy; k++) nnz[i]++;
2176   }
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2181 #undef __FUNCT__
2182 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2183 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2184 {
2185   PetscErrorCode ierr;
2186   PetscInt       m = Y->rmap->N;
2187   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2188   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2189 
2190   PetscFunctionBegin;
2191   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatAXPY_MPIAIJ"
2197 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2198 {
2199   PetscErrorCode ierr;
2200   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2201   PetscBLASInt   bnz,one=1;
2202   Mat_SeqAIJ     *x,*y;
2203 
2204   PetscFunctionBegin;
2205   if (str == SAME_NONZERO_PATTERN) {
2206     PetscScalar alpha = a;
2207     x    = (Mat_SeqAIJ*)xx->A->data;
2208     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2209     y    = (Mat_SeqAIJ*)yy->A->data;
2210     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2211     x    = (Mat_SeqAIJ*)xx->B->data;
2212     y    = (Mat_SeqAIJ*)yy->B->data;
2213     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2214     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2215     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2216   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2217     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2218   } else {
2219     Mat      B;
2220     PetscInt *nnz_d,*nnz_o;
2221     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2222     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2223     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2224     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2225     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2226     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2227     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2228     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2229     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2230     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2231     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2232     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2233     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2234     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2235   }
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2240 
2241 #undef __FUNCT__
2242 #define __FUNCT__ "MatConjugate_MPIAIJ"
2243 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2244 {
2245 #if defined(PETSC_USE_COMPLEX)
2246   PetscErrorCode ierr;
2247   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2248 
2249   PetscFunctionBegin;
2250   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2251   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2252 #else
2253   PetscFunctionBegin;
2254 #endif
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatRealPart_MPIAIJ"
2260 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2261 {
2262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2263   PetscErrorCode ierr;
2264 
2265   PetscFunctionBegin;
2266   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2267   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2273 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2274 {
2275   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2276   PetscErrorCode ierr;
2277 
2278   PetscFunctionBegin;
2279   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2280   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 #if defined(PETSC_HAVE_PBGL)
2285 
2286 #include <boost/parallel/mpi/bsp_process_group.hpp>
2287 #include <boost/graph/distributed/ilu_default_graph.hpp>
2288 #include <boost/graph/distributed/ilu_0_block.hpp>
2289 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2290 #include <boost/graph/distributed/petsc/interface.hpp>
2291 #include <boost/multi_array.hpp>
2292 #include <boost/parallel/distributed_property_map->hpp>
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2296 /*
2297   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2298 */
2299 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2300 {
2301   namespace petsc = boost::distributed::petsc;
2302 
2303   namespace graph_dist = boost::graph::distributed;
2304   using boost::graph::distributed::ilu_default::process_group_type;
2305   using boost::graph::ilu_permuted;
2306 
2307   PetscBool      row_identity, col_identity;
2308   PetscContainer c;
2309   PetscInt       m, n, M, N;
2310   PetscErrorCode ierr;
2311 
2312   PetscFunctionBegin;
2313   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2314   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2315   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2316   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2317 
2318   process_group_type pg;
2319   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2320   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2321   lgraph_type& level_graph = *lgraph_p;
2322   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2323 
2324   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2325   ilu_permuted(level_graph);
2326 
2327   /* put together the new matrix */
2328   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2329   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2330   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2331   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2332   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2333   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2334   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2335   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2336 
2337   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2338   ierr = PetscContainerSetPointer(c, lgraph_p);
2339   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2340   ierr = PetscContainerDestroy(&c);
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2346 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2347 {
2348   PetscFunctionBegin;
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 #undef __FUNCT__
2353 #define __FUNCT__ "MatSolve_MPIAIJ"
2354 /*
2355   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2356 */
2357 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2358 {
2359   namespace graph_dist = boost::graph::distributed;
2360 
2361   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2362   lgraph_type    *lgraph_p;
2363   PetscContainer c;
2364   PetscErrorCode ierr;
2365 
2366   PetscFunctionBegin;
2367   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2368   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2369   ierr = VecCopy(b, x);CHKERRQ(ierr);
2370 
2371   PetscScalar *array_x;
2372   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2373   PetscInt sx;
2374   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2375 
2376   PetscScalar *array_b;
2377   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2378   PetscInt sb;
2379   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2380 
2381   lgraph_type& level_graph = *lgraph_p;
2382   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2383 
2384   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2385   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2386   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2387 
2388   typedef boost::iterator_property_map<array_ref_type::iterator,
2389                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2390   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2391   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2392 
2393   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2394   PetscFunctionReturn(0);
2395 }
2396 #endif
2397 
2398 #undef __FUNCT__
2399 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2400 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2401 {
2402   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2403   PetscErrorCode ierr;
2404   PetscInt       i,*idxb = 0;
2405   PetscScalar    *va,*vb;
2406   Vec            vtmp;
2407 
2408   PetscFunctionBegin;
2409   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2410   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2411   if (idx) {
2412     for (i=0; i<A->rmap->n; i++) {
2413       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2414     }
2415   }
2416 
2417   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2418   if (idx) {
2419     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2420   }
2421   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2422   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2423 
2424   for (i=0; i<A->rmap->n; i++) {
2425     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2426       va[i] = vb[i];
2427       if (idx) idx[i] = a->garray[idxb[i]];
2428     }
2429   }
2430 
2431   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2432   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2433   ierr = PetscFree(idxb);CHKERRQ(ierr);
2434   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 #undef __FUNCT__
2439 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2440 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2441 {
2442   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2443   PetscErrorCode ierr;
2444   PetscInt       i,*idxb = 0;
2445   PetscScalar    *va,*vb;
2446   Vec            vtmp;
2447 
2448   PetscFunctionBegin;
2449   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2450   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2451   if (idx) {
2452     for (i=0; i<A->cmap->n; i++) {
2453       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2454     }
2455   }
2456 
2457   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2458   if (idx) {
2459     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2460   }
2461   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2462   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2463 
2464   for (i=0; i<A->rmap->n; i++) {
2465     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2466       va[i] = vb[i];
2467       if (idx) idx[i] = a->garray[idxb[i]];
2468     }
2469   }
2470 
2471   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2472   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2473   ierr = PetscFree(idxb);CHKERRQ(ierr);
2474   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2480 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2481 {
2482   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2483   PetscInt       n      = A->rmap->n;
2484   PetscInt       cstart = A->cmap->rstart;
2485   PetscInt       *cmap  = mat->garray;
2486   PetscInt       *diagIdx, *offdiagIdx;
2487   Vec            diagV, offdiagV;
2488   PetscScalar    *a, *diagA, *offdiagA;
2489   PetscInt       r;
2490   PetscErrorCode ierr;
2491 
2492   PetscFunctionBegin;
2493   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2494   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2495   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2496   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2497   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2498   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2499   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2500   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2501   for (r = 0; r < n; ++r) {
2502     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2503       a[r]   = diagA[r];
2504       idx[r] = cstart + diagIdx[r];
2505     } else {
2506       a[r]   = offdiagA[r];
2507       idx[r] = cmap[offdiagIdx[r]];
2508     }
2509   }
2510   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2511   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2512   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2513   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2514   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2515   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 #undef __FUNCT__
2520 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2521 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2522 {
2523   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2524   PetscInt       n      = A->rmap->n;
2525   PetscInt       cstart = A->cmap->rstart;
2526   PetscInt       *cmap  = mat->garray;
2527   PetscInt       *diagIdx, *offdiagIdx;
2528   Vec            diagV, offdiagV;
2529   PetscScalar    *a, *diagA, *offdiagA;
2530   PetscInt       r;
2531   PetscErrorCode ierr;
2532 
2533   PetscFunctionBegin;
2534   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2535   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2536   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2537   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2538   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2539   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2540   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2541   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2542   for (r = 0; r < n; ++r) {
2543     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2544       a[r]   = diagA[r];
2545       idx[r] = cstart + diagIdx[r];
2546     } else {
2547       a[r]   = offdiagA[r];
2548       idx[r] = cmap[offdiagIdx[r]];
2549     }
2550   }
2551   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2552   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2553   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2554   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2555   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2556   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2562 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2563 {
2564   PetscErrorCode ierr;
2565   Mat            *dummy;
2566 
2567   PetscFunctionBegin;
2568   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2569   *newmat = *dummy;
2570   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 #undef __FUNCT__
2575 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2576 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2577 {
2578   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2579   PetscErrorCode ierr;
2580 
2581   PetscFunctionBegin;
2582   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 #undef __FUNCT__
2587 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2588 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2589 {
2590   PetscErrorCode ierr;
2591   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2592 
2593   PetscFunctionBegin;
2594   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2595   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2596   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2597   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 #undef __FUNCT__
2602 #define __FUNCT__ "MatShift_MPIAIJ"
2603 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2604 {
2605   PetscErrorCode ierr;
2606   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2607   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data,*bij = (Mat_SeqAIJ*)maij->B->data;
2608 
2609   PetscFunctionBegin;
2610   if (!aij->nz && !bij->nz) {
2611     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2612   }
2613   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2614   PetscFunctionReturn(0);
2615 }
2616 
2617 /* -------------------------------------------------------------------*/
2618 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2619                                        MatGetRow_MPIAIJ,
2620                                        MatRestoreRow_MPIAIJ,
2621                                        MatMult_MPIAIJ,
2622                                 /* 4*/ MatMultAdd_MPIAIJ,
2623                                        MatMultTranspose_MPIAIJ,
2624                                        MatMultTransposeAdd_MPIAIJ,
2625 #if defined(PETSC_HAVE_PBGL)
2626                                        MatSolve_MPIAIJ,
2627 #else
2628                                        0,
2629 #endif
2630                                        0,
2631                                        0,
2632                                 /*10*/ 0,
2633                                        0,
2634                                        0,
2635                                        MatSOR_MPIAIJ,
2636                                        MatTranspose_MPIAIJ,
2637                                 /*15*/ MatGetInfo_MPIAIJ,
2638                                        MatEqual_MPIAIJ,
2639                                        MatGetDiagonal_MPIAIJ,
2640                                        MatDiagonalScale_MPIAIJ,
2641                                        MatNorm_MPIAIJ,
2642                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2643                                        MatAssemblyEnd_MPIAIJ,
2644                                        MatSetOption_MPIAIJ,
2645                                        MatZeroEntries_MPIAIJ,
2646                                 /*24*/ MatZeroRows_MPIAIJ,
2647                                        0,
2648 #if defined(PETSC_HAVE_PBGL)
2649                                        0,
2650 #else
2651                                        0,
2652 #endif
2653                                        0,
2654                                        0,
2655                                 /*29*/ MatSetUp_MPIAIJ,
2656 #if defined(PETSC_HAVE_PBGL)
2657                                        0,
2658 #else
2659                                        0,
2660 #endif
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*34*/ MatDuplicate_MPIAIJ,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                 /*39*/ MatAXPY_MPIAIJ,
2670                                        MatGetSubMatrices_MPIAIJ,
2671                                        MatIncreaseOverlap_MPIAIJ,
2672                                        MatGetValues_MPIAIJ,
2673                                        MatCopy_MPIAIJ,
2674                                 /*44*/ MatGetRowMax_MPIAIJ,
2675                                        MatScale_MPIAIJ,
2676                                        MatShift_MPIAIJ,
2677                                        MatDiagonalSet_MPIAIJ,
2678                                        MatZeroRowsColumns_MPIAIJ,
2679                                 /*49*/ MatSetRandom_MPIAIJ,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2685                                        0,
2686                                        MatSetUnfactored_MPIAIJ,
2687                                        MatPermute_MPIAIJ,
2688                                        0,
2689                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2690                                        MatDestroy_MPIAIJ,
2691                                        MatView_MPIAIJ,
2692                                        0,
2693                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2694                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2695                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2700                                        MatGetRowMinAbs_MPIAIJ,
2701                                        0,
2702                                        MatSetColoring_MPIAIJ,
2703                                        0,
2704                                        MatSetValuesAdifor_MPIAIJ,
2705                                 /*75*/ MatFDColoringApply_AIJ,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                        MatFindZeroDiagonals_MPIAIJ,
2710                                 /*80*/ 0,
2711                                        0,
2712                                        0,
2713                                 /*83*/ MatLoad_MPIAIJ,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2720                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2721                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2722                                        MatPtAP_MPIAIJ_MPIAIJ,
2723                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2724                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2725                                        0,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                 /*99*/ 0,
2730                                        0,
2731                                        0,
2732                                        MatConjugate_MPIAIJ,
2733                                        0,
2734                                 /*104*/MatSetValuesRow_MPIAIJ,
2735                                        MatRealPart_MPIAIJ,
2736                                        MatImaginaryPart_MPIAIJ,
2737                                        0,
2738                                        0,
2739                                 /*109*/0,
2740                                        0,
2741                                        MatGetRowMin_MPIAIJ,
2742                                        0,
2743                                        0,
2744                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2745                                        0,
2746                                        MatGetGhosts_MPIAIJ,
2747                                        0,
2748                                        0,
2749                                 /*119*/0,
2750                                        0,
2751                                        0,
2752                                        0,
2753                                        MatGetMultiProcBlock_MPIAIJ,
2754                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2755                                        MatGetColumnNorms_MPIAIJ,
2756                                        MatInvertBlockDiagonal_MPIAIJ,
2757                                        0,
2758                                        MatGetSubMatricesMPI_MPIAIJ,
2759                                 /*129*/0,
2760                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2761                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2762                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2763                                        0,
2764                                 /*134*/0,
2765                                        0,
2766                                        0,
2767                                        0,
2768                                        0,
2769                                 /*139*/0,
2770                                        0,
2771                                        0,
2772                                        MatFDColoringSetUp_MPIXAIJ,
2773                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2774                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2775 };
2776 
2777 /* ----------------------------------------------------------------------------------------*/
2778 
2779 #undef __FUNCT__
2780 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2781 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2782 {
2783   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2784   PetscErrorCode ierr;
2785 
2786   PetscFunctionBegin;
2787   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2788   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2789   PetscFunctionReturn(0);
2790 }
2791 
2792 #undef __FUNCT__
2793 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2794 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2795 {
2796   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2797   PetscErrorCode ierr;
2798 
2799   PetscFunctionBegin;
2800   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2801   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2802   PetscFunctionReturn(0);
2803 }
2804 
2805 #undef __FUNCT__
2806 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2807 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2808 {
2809   Mat_MPIAIJ     *b;
2810   PetscErrorCode ierr;
2811 
2812   PetscFunctionBegin;
2813   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2814   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2815   b = (Mat_MPIAIJ*)B->data;
2816 
2817   if (!B->preallocated) {
2818     /* Explicitly create 2 MATSEQAIJ matrices. */
2819     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2820     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2821     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2822     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2823     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2824     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2825     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2826     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2827     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2828     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2829   }
2830 
2831   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2832   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2833   B->preallocated = PETSC_TRUE;
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 #undef __FUNCT__
2838 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2839 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2840 {
2841   Mat            mat;
2842   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2843   PetscErrorCode ierr;
2844 
2845   PetscFunctionBegin;
2846   *newmat = 0;
2847   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2848   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2849   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2850   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2851   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2852   a       = (Mat_MPIAIJ*)mat->data;
2853 
2854   mat->factortype   = matin->factortype;
2855   mat->assembled    = PETSC_TRUE;
2856   mat->insertmode   = NOT_SET_VALUES;
2857   mat->preallocated = PETSC_TRUE;
2858 
2859   a->size         = oldmat->size;
2860   a->rank         = oldmat->rank;
2861   a->donotstash   = oldmat->donotstash;
2862   a->roworiented  = oldmat->roworiented;
2863   a->rowindices   = 0;
2864   a->rowvalues    = 0;
2865   a->getrowactive = PETSC_FALSE;
2866 
2867   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2868   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2869 
2870   if (oldmat->colmap) {
2871 #if defined(PETSC_USE_CTABLE)
2872     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2873 #else
2874     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2875     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2876     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2877 #endif
2878   } else a->colmap = 0;
2879   if (oldmat->garray) {
2880     PetscInt len;
2881     len  = oldmat->B->cmap->n;
2882     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2883     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2884     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2885   } else a->garray = 0;
2886 
2887   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2888   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2889   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2890   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2891   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2892   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2893   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2894   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2895   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2896   *newmat = mat;
2897   PetscFunctionReturn(0);
2898 }
2899 
2900 
2901 
2902 #undef __FUNCT__
2903 #define __FUNCT__ "MatLoad_MPIAIJ"
2904 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2905 {
2906   PetscScalar    *vals,*svals;
2907   MPI_Comm       comm;
2908   PetscErrorCode ierr;
2909   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2910   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2911   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2912   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2913   PetscInt       cend,cstart,n,*rowners;
2914   int            fd;
2915   PetscInt       bs = newMat->rmap->bs;
2916 
2917   PetscFunctionBegin;
2918   /* force binary viewer to load .info file if it has not yet done so */
2919   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2920   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2921   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2922   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2923   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2924   if (!rank) {
2925     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2926     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2927   }
2928 
2929   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2930   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2931   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2932   if (bs < 0) bs = 1;
2933 
2934   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2935   M    = header[1]; N = header[2];
2936 
2937   /* If global sizes are set, check if they are consistent with that given in the file */
2938   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2939   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2940 
2941   /* determine ownership of all (block) rows */
2942   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2943   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2944   else m = newMat->rmap->n; /* Set by user */
2945 
2946   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2947   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2948 
2949   /* First process needs enough room for process with most rows */
2950   if (!rank) {
2951     mmax = rowners[1];
2952     for (i=2; i<=size; i++) {
2953       mmax = PetscMax(mmax, rowners[i]);
2954     }
2955   } else mmax = -1;             /* unused, but compilers complain */
2956 
2957   rowners[0] = 0;
2958   for (i=2; i<=size; i++) {
2959     rowners[i] += rowners[i-1];
2960   }
2961   rstart = rowners[rank];
2962   rend   = rowners[rank+1];
2963 
2964   /* distribute row lengths to all processors */
2965   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2966   if (!rank) {
2967     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2968     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2969     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2970     for (j=0; j<m; j++) {
2971       procsnz[0] += ourlens[j];
2972     }
2973     for (i=1; i<size; i++) {
2974       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2975       /* calculate the number of nonzeros on each processor */
2976       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2977         procsnz[i] += rowlengths[j];
2978       }
2979       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2980     }
2981     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2982   } else {
2983     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2984   }
2985 
2986   if (!rank) {
2987     /* determine max buffer needed and allocate it */
2988     maxnz = 0;
2989     for (i=0; i<size; i++) {
2990       maxnz = PetscMax(maxnz,procsnz[i]);
2991     }
2992     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2993 
2994     /* read in my part of the matrix column indices  */
2995     nz   = procsnz[0];
2996     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2997     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2998 
2999     /* read in every one elses and ship off */
3000     for (i=1; i<size; i++) {
3001       nz   = procsnz[i];
3002       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3003       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3004     }
3005     ierr = PetscFree(cols);CHKERRQ(ierr);
3006   } else {
3007     /* determine buffer space needed for message */
3008     nz = 0;
3009     for (i=0; i<m; i++) {
3010       nz += ourlens[i];
3011     }
3012     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3013 
3014     /* receive message of column indices*/
3015     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3016   }
3017 
3018   /* determine column ownership if matrix is not square */
3019   if (N != M) {
3020     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3021     else n = newMat->cmap->n;
3022     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3023     cstart = cend - n;
3024   } else {
3025     cstart = rstart;
3026     cend   = rend;
3027     n      = cend - cstart;
3028   }
3029 
3030   /* loop over local rows, determining number of off diagonal entries */
3031   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3032   jj   = 0;
3033   for (i=0; i<m; i++) {
3034     for (j=0; j<ourlens[i]; j++) {
3035       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3036       jj++;
3037     }
3038   }
3039 
3040   for (i=0; i<m; i++) {
3041     ourlens[i] -= offlens[i];
3042   }
3043   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3044 
3045   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3046 
3047   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3048 
3049   for (i=0; i<m; i++) {
3050     ourlens[i] += offlens[i];
3051   }
3052 
3053   if (!rank) {
3054     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3055 
3056     /* read in my part of the matrix numerical values  */
3057     nz   = procsnz[0];
3058     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3059 
3060     /* insert into matrix */
3061     jj      = rstart;
3062     smycols = mycols;
3063     svals   = vals;
3064     for (i=0; i<m; i++) {
3065       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3066       smycols += ourlens[i];
3067       svals   += ourlens[i];
3068       jj++;
3069     }
3070 
3071     /* read in other processors and ship out */
3072     for (i=1; i<size; i++) {
3073       nz   = procsnz[i];
3074       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3075       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3076     }
3077     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3078   } else {
3079     /* receive numeric values */
3080     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3081 
3082     /* receive message of values*/
3083     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3084 
3085     /* insert into matrix */
3086     jj      = rstart;
3087     smycols = mycols;
3088     svals   = vals;
3089     for (i=0; i<m; i++) {
3090       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3091       smycols += ourlens[i];
3092       svals   += ourlens[i];
3093       jj++;
3094     }
3095   }
3096   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3097   ierr = PetscFree(vals);CHKERRQ(ierr);
3098   ierr = PetscFree(mycols);CHKERRQ(ierr);
3099   ierr = PetscFree(rowners);CHKERRQ(ierr);
3100   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3101   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3102   PetscFunctionReturn(0);
3103 }
3104 
3105 #undef __FUNCT__
3106 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3107 /* TODO: Not scalable because of ISAllGather(). */
3108 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3109 {
3110   PetscErrorCode ierr;
3111   IS             iscol_local;
3112   PetscInt       csize;
3113 
3114   PetscFunctionBegin;
3115   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3116   if (call == MAT_REUSE_MATRIX) {
3117     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3118     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3119   } else {
3120     PetscInt cbs;
3121     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3122     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3123     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3124   }
3125   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3126   if (call == MAT_INITIAL_MATRIX) {
3127     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3128     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3129   }
3130   PetscFunctionReturn(0);
3131 }
3132 
3133 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3134 #undef __FUNCT__
3135 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3136 /*
3137     Not great since it makes two copies of the submatrix, first an SeqAIJ
3138   in local and then by concatenating the local matrices the end result.
3139   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3140 
3141   Note: This requires a sequential iscol with all indices.
3142 */
3143 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3144 {
3145   PetscErrorCode ierr;
3146   PetscMPIInt    rank,size;
3147   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3148   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3149   PetscBool      allcolumns, colflag;
3150   Mat            M,Mreuse;
3151   MatScalar      *vwork,*aa;
3152   MPI_Comm       comm;
3153   Mat_SeqAIJ     *aij;
3154 
3155   PetscFunctionBegin;
3156   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3157   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3158   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3159 
3160   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3161   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3162   if (colflag && ncol == mat->cmap->N) {
3163     allcolumns = PETSC_TRUE;
3164   } else {
3165     allcolumns = PETSC_FALSE;
3166   }
3167   if (call ==  MAT_REUSE_MATRIX) {
3168     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3169     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3170     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3171   } else {
3172     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3173   }
3174 
3175   /*
3176       m - number of local rows
3177       n - number of columns (same on all processors)
3178       rstart - first row in new global matrix generated
3179   */
3180   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3181   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3182   if (call == MAT_INITIAL_MATRIX) {
3183     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3184     ii  = aij->i;
3185     jj  = aij->j;
3186 
3187     /*
3188         Determine the number of non-zeros in the diagonal and off-diagonal
3189         portions of the matrix in order to do correct preallocation
3190     */
3191 
3192     /* first get start and end of "diagonal" columns */
3193     if (csize == PETSC_DECIDE) {
3194       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3195       if (mglobal == n) { /* square matrix */
3196         nlocal = m;
3197       } else {
3198         nlocal = n/size + ((n % size) > rank);
3199       }
3200     } else {
3201       nlocal = csize;
3202     }
3203     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3204     rstart = rend - nlocal;
3205     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3206 
3207     /* next, compute all the lengths */
3208     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3209     olens = dlens + m;
3210     for (i=0; i<m; i++) {
3211       jend = ii[i+1] - ii[i];
3212       olen = 0;
3213       dlen = 0;
3214       for (j=0; j<jend; j++) {
3215         if (*jj < rstart || *jj >= rend) olen++;
3216         else dlen++;
3217         jj++;
3218       }
3219       olens[i] = olen;
3220       dlens[i] = dlen;
3221     }
3222     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3223     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3224     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3225     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3226     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3227     ierr = PetscFree(dlens);CHKERRQ(ierr);
3228   } else {
3229     PetscInt ml,nl;
3230 
3231     M    = *newmat;
3232     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3233     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3234     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3235     /*
3236          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3237        rather than the slower MatSetValues().
3238     */
3239     M->was_assembled = PETSC_TRUE;
3240     M->assembled     = PETSC_FALSE;
3241   }
3242   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3243   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3244   ii   = aij->i;
3245   jj   = aij->j;
3246   aa   = aij->a;
3247   for (i=0; i<m; i++) {
3248     row   = rstart + i;
3249     nz    = ii[i+1] - ii[i];
3250     cwork = jj;     jj += nz;
3251     vwork = aa;     aa += nz;
3252     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3253   }
3254 
3255   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3256   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3257   *newmat = M;
3258 
3259   /* save submatrix used in processor for next request */
3260   if (call ==  MAT_INITIAL_MATRIX) {
3261     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3262     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3263   }
3264   PetscFunctionReturn(0);
3265 }
3266 
3267 #undef __FUNCT__
3268 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3269 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3270 {
3271   PetscInt       m,cstart, cend,j,nnz,i,d;
3272   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3273   const PetscInt *JJ;
3274   PetscScalar    *values;
3275   PetscErrorCode ierr;
3276 
3277   PetscFunctionBegin;
3278   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3279 
3280   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3281   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3282   m      = B->rmap->n;
3283   cstart = B->cmap->rstart;
3284   cend   = B->cmap->rend;
3285   rstart = B->rmap->rstart;
3286 
3287   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3288 
3289 #if defined(PETSC_USE_DEBUGGING)
3290   for (i=0; i<m; i++) {
3291     nnz = Ii[i+1]- Ii[i];
3292     JJ  = J + Ii[i];
3293     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3294     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3295     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3296   }
3297 #endif
3298 
3299   for (i=0; i<m; i++) {
3300     nnz     = Ii[i+1]- Ii[i];
3301     JJ      = J + Ii[i];
3302     nnz_max = PetscMax(nnz_max,nnz);
3303     d       = 0;
3304     for (j=0; j<nnz; j++) {
3305       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3306     }
3307     d_nnz[i] = d;
3308     o_nnz[i] = nnz - d;
3309   }
3310   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3311   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3312 
3313   if (v) values = (PetscScalar*)v;
3314   else {
3315     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3316   }
3317 
3318   for (i=0; i<m; i++) {
3319     ii   = i + rstart;
3320     nnz  = Ii[i+1]- Ii[i];
3321     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3322   }
3323   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3324   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3325 
3326   if (!v) {
3327     ierr = PetscFree(values);CHKERRQ(ierr);
3328   }
3329   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3330   PetscFunctionReturn(0);
3331 }
3332 
3333 #undef __FUNCT__
3334 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3335 /*@
3336    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3337    (the default parallel PETSc format).
3338 
3339    Collective on MPI_Comm
3340 
3341    Input Parameters:
3342 +  B - the matrix
3343 .  i - the indices into j for the start of each local row (starts with zero)
3344 .  j - the column indices for each local row (starts with zero)
3345 -  v - optional values in the matrix
3346 
3347    Level: developer
3348 
3349    Notes:
3350        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3351      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3352      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3353 
3354        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3355 
3356        The format which is used for the sparse matrix input, is equivalent to a
3357     row-major ordering.. i.e for the following matrix, the input data expected is
3358     as shown
3359 
3360 $        1 0 0
3361 $        2 0 3     P0
3362 $       -------
3363 $        4 5 6     P1
3364 $
3365 $     Process0 [P0]: rows_owned=[0,1]
3366 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3367 $        j =  {0,0,2}  [size = 3]
3368 $        v =  {1,2,3}  [size = 3]
3369 $
3370 $     Process1 [P1]: rows_owned=[2]
3371 $        i =  {0,3}    [size = nrow+1  = 1+1]
3372 $        j =  {0,1,2}  [size = 3]
3373 $        v =  {4,5,6}  [size = 3]
3374 
3375 .keywords: matrix, aij, compressed row, sparse, parallel
3376 
3377 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3378           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3379 @*/
3380 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3381 {
3382   PetscErrorCode ierr;
3383 
3384   PetscFunctionBegin;
3385   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3386   PetscFunctionReturn(0);
3387 }
3388 
3389 #undef __FUNCT__
3390 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3391 /*@C
3392    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3393    (the default parallel PETSc format).  For good matrix assembly performance
3394    the user should preallocate the matrix storage by setting the parameters
3395    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3396    performance can be increased by more than a factor of 50.
3397 
3398    Collective on MPI_Comm
3399 
3400    Input Parameters:
3401 +  B - the matrix
3402 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3403            (same value is used for all local rows)
3404 .  d_nnz - array containing the number of nonzeros in the various rows of the
3405            DIAGONAL portion of the local submatrix (possibly different for each row)
3406            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3407            The size of this array is equal to the number of local rows, i.e 'm'.
3408            For matrices that will be factored, you must leave room for (and set)
3409            the diagonal entry even if it is zero.
3410 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3411            submatrix (same value is used for all local rows).
3412 -  o_nnz - array containing the number of nonzeros in the various rows of the
3413            OFF-DIAGONAL portion of the local submatrix (possibly different for
3414            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3415            structure. The size of this array is equal to the number
3416            of local rows, i.e 'm'.
3417 
3418    If the *_nnz parameter is given then the *_nz parameter is ignored
3419 
3420    The AIJ format (also called the Yale sparse matrix format or
3421    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3422    storage.  The stored row and column indices begin with zero.
3423    See Users-Manual: ch_mat for details.
3424 
3425    The parallel matrix is partitioned such that the first m0 rows belong to
3426    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3427    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3428 
3429    The DIAGONAL portion of the local submatrix of a processor can be defined
3430    as the submatrix which is obtained by extraction the part corresponding to
3431    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3432    first row that belongs to the processor, r2 is the last row belonging to
3433    the this processor, and c1-c2 is range of indices of the local part of a
3434    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3435    common case of a square matrix, the row and column ranges are the same and
3436    the DIAGONAL part is also square. The remaining portion of the local
3437    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3438 
3439    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3440 
3441    You can call MatGetInfo() to get information on how effective the preallocation was;
3442    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3443    You can also run with the option -info and look for messages with the string
3444    malloc in them to see if additional memory allocation was needed.
3445 
3446    Example usage:
3447 
3448    Consider the following 8x8 matrix with 34 non-zero values, that is
3449    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3450    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3451    as follows:
3452 
3453 .vb
3454             1  2  0  |  0  3  0  |  0  4
3455     Proc0   0  5  6  |  7  0  0  |  8  0
3456             9  0 10  | 11  0  0  | 12  0
3457     -------------------------------------
3458            13  0 14  | 15 16 17  |  0  0
3459     Proc1   0 18  0  | 19 20 21  |  0  0
3460             0  0  0  | 22 23  0  | 24  0
3461     -------------------------------------
3462     Proc2  25 26 27  |  0  0 28  | 29  0
3463            30  0  0  | 31 32 33  |  0 34
3464 .ve
3465 
3466    This can be represented as a collection of submatrices as:
3467 
3468 .vb
3469       A B C
3470       D E F
3471       G H I
3472 .ve
3473 
3474    Where the submatrices A,B,C are owned by proc0, D,E,F are
3475    owned by proc1, G,H,I are owned by proc2.
3476 
3477    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3478    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3479    The 'M','N' parameters are 8,8, and have the same values on all procs.
3480 
3481    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3482    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3483    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3484    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3485    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3486    matrix, ans [DF] as another SeqAIJ matrix.
3487 
3488    When d_nz, o_nz parameters are specified, d_nz storage elements are
3489    allocated for every row of the local diagonal submatrix, and o_nz
3490    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3491    One way to choose d_nz and o_nz is to use the max nonzerors per local
3492    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3493    In this case, the values of d_nz,o_nz are:
3494 .vb
3495      proc0 : dnz = 2, o_nz = 2
3496      proc1 : dnz = 3, o_nz = 2
3497      proc2 : dnz = 1, o_nz = 4
3498 .ve
3499    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3500    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3501    for proc3. i.e we are using 12+15+10=37 storage locations to store
3502    34 values.
3503 
3504    When d_nnz, o_nnz parameters are specified, the storage is specified
3505    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3506    In the above case the values for d_nnz,o_nnz are:
3507 .vb
3508      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3509      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3510      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3511 .ve
3512    Here the space allocated is sum of all the above values i.e 34, and
3513    hence pre-allocation is perfect.
3514 
3515    Level: intermediate
3516 
3517 .keywords: matrix, aij, compressed row, sparse, parallel
3518 
3519 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3520           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3521 @*/
3522 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3523 {
3524   PetscErrorCode ierr;
3525 
3526   PetscFunctionBegin;
3527   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3528   PetscValidType(B,1);
3529   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3530   PetscFunctionReturn(0);
3531 }
3532 
3533 #undef __FUNCT__
3534 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3535 /*@
3536      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3537          CSR format the local rows.
3538 
3539    Collective on MPI_Comm
3540 
3541    Input Parameters:
3542 +  comm - MPI communicator
3543 .  m - number of local rows (Cannot be PETSC_DECIDE)
3544 .  n - This value should be the same as the local size used in creating the
3545        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3546        calculated if N is given) For square matrices n is almost always m.
3547 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3548 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3549 .   i - row indices
3550 .   j - column indices
3551 -   a - matrix values
3552 
3553    Output Parameter:
3554 .   mat - the matrix
3555 
3556    Level: intermediate
3557 
3558    Notes:
3559        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3560      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3561      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3562 
3563        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3564 
3565        The format which is used for the sparse matrix input, is equivalent to a
3566     row-major ordering.. i.e for the following matrix, the input data expected is
3567     as shown
3568 
3569 $        1 0 0
3570 $        2 0 3     P0
3571 $       -------
3572 $        4 5 6     P1
3573 $
3574 $     Process0 [P0]: rows_owned=[0,1]
3575 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3576 $        j =  {0,0,2}  [size = 3]
3577 $        v =  {1,2,3}  [size = 3]
3578 $
3579 $     Process1 [P1]: rows_owned=[2]
3580 $        i =  {0,3}    [size = nrow+1  = 1+1]
3581 $        j =  {0,1,2}  [size = 3]
3582 $        v =  {4,5,6}  [size = 3]
3583 
3584 .keywords: matrix, aij, compressed row, sparse, parallel
3585 
3586 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3587           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3588 @*/
3589 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3590 {
3591   PetscErrorCode ierr;
3592 
3593   PetscFunctionBegin;
3594   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3595   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3596   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3597   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3598   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3599   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3600   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3601   PetscFunctionReturn(0);
3602 }
3603 
3604 #undef __FUNCT__
3605 #define __FUNCT__ "MatCreateAIJ"
3606 /*@C
3607    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3608    (the default parallel PETSc format).  For good matrix assembly performance
3609    the user should preallocate the matrix storage by setting the parameters
3610    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3611    performance can be increased by more than a factor of 50.
3612 
3613    Collective on MPI_Comm
3614 
3615    Input Parameters:
3616 +  comm - MPI communicator
3617 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3618            This value should be the same as the local size used in creating the
3619            y vector for the matrix-vector product y = Ax.
3620 .  n - This value should be the same as the local size used in creating the
3621        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3622        calculated if N is given) For square matrices n is almost always m.
3623 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3624 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3625 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3626            (same value is used for all local rows)
3627 .  d_nnz - array containing the number of nonzeros in the various rows of the
3628            DIAGONAL portion of the local submatrix (possibly different for each row)
3629            or NULL, if d_nz is used to specify the nonzero structure.
3630            The size of this array is equal to the number of local rows, i.e 'm'.
3631 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3632            submatrix (same value is used for all local rows).
3633 -  o_nnz - array containing the number of nonzeros in the various rows of the
3634            OFF-DIAGONAL portion of the local submatrix (possibly different for
3635            each row) or NULL, if o_nz is used to specify the nonzero
3636            structure. The size of this array is equal to the number
3637            of local rows, i.e 'm'.
3638 
3639    Output Parameter:
3640 .  A - the matrix
3641 
3642    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3643    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3644    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3645 
3646    Notes:
3647    If the *_nnz parameter is given then the *_nz parameter is ignored
3648 
3649    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3650    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3651    storage requirements for this matrix.
3652 
3653    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3654    processor than it must be used on all processors that share the object for
3655    that argument.
3656 
3657    The user MUST specify either the local or global matrix dimensions
3658    (possibly both).
3659 
3660    The parallel matrix is partitioned across processors such that the
3661    first m0 rows belong to process 0, the next m1 rows belong to
3662    process 1, the next m2 rows belong to process 2 etc.. where
3663    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3664    values corresponding to [m x N] submatrix.
3665 
3666    The columns are logically partitioned with the n0 columns belonging
3667    to 0th partition, the next n1 columns belonging to the next
3668    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3669 
3670    The DIAGONAL portion of the local submatrix on any given processor
3671    is the submatrix corresponding to the rows and columns m,n
3672    corresponding to the given processor. i.e diagonal matrix on
3673    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3674    etc. The remaining portion of the local submatrix [m x (N-n)]
3675    constitute the OFF-DIAGONAL portion. The example below better
3676    illustrates this concept.
3677 
3678    For a square global matrix we define each processor's diagonal portion
3679    to be its local rows and the corresponding columns (a square submatrix);
3680    each processor's off-diagonal portion encompasses the remainder of the
3681    local matrix (a rectangular submatrix).
3682 
3683    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3684 
3685    When calling this routine with a single process communicator, a matrix of
3686    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3687    type of communicator, use the construction mechanism:
3688      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3689 
3690    By default, this format uses inodes (identical nodes) when possible.
3691    We search for consecutive rows with the same nonzero structure, thereby
3692    reusing matrix information to achieve increased efficiency.
3693 
3694    Options Database Keys:
3695 +  -mat_no_inode  - Do not use inodes
3696 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3697 -  -mat_aij_oneindex - Internally use indexing starting at 1
3698         rather than 0.  Note that when calling MatSetValues(),
3699         the user still MUST index entries starting at 0!
3700 
3701 
3702    Example usage:
3703 
3704    Consider the following 8x8 matrix with 34 non-zero values, that is
3705    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3706    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3707    as follows:
3708 
3709 .vb
3710             1  2  0  |  0  3  0  |  0  4
3711     Proc0   0  5  6  |  7  0  0  |  8  0
3712             9  0 10  | 11  0  0  | 12  0
3713     -------------------------------------
3714            13  0 14  | 15 16 17  |  0  0
3715     Proc1   0 18  0  | 19 20 21  |  0  0
3716             0  0  0  | 22 23  0  | 24  0
3717     -------------------------------------
3718     Proc2  25 26 27  |  0  0 28  | 29  0
3719            30  0  0  | 31 32 33  |  0 34
3720 .ve
3721 
3722    This can be represented as a collection of submatrices as:
3723 
3724 .vb
3725       A B C
3726       D E F
3727       G H I
3728 .ve
3729 
3730    Where the submatrices A,B,C are owned by proc0, D,E,F are
3731    owned by proc1, G,H,I are owned by proc2.
3732 
3733    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3734    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3735    The 'M','N' parameters are 8,8, and have the same values on all procs.
3736 
3737    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3738    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3739    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3740    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3741    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3742    matrix, ans [DF] as another SeqAIJ matrix.
3743 
3744    When d_nz, o_nz parameters are specified, d_nz storage elements are
3745    allocated for every row of the local diagonal submatrix, and o_nz
3746    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3747    One way to choose d_nz and o_nz is to use the max nonzerors per local
3748    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3749    In this case, the values of d_nz,o_nz are:
3750 .vb
3751      proc0 : dnz = 2, o_nz = 2
3752      proc1 : dnz = 3, o_nz = 2
3753      proc2 : dnz = 1, o_nz = 4
3754 .ve
3755    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3756    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3757    for proc3. i.e we are using 12+15+10=37 storage locations to store
3758    34 values.
3759 
3760    When d_nnz, o_nnz parameters are specified, the storage is specified
3761    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3762    In the above case the values for d_nnz,o_nnz are:
3763 .vb
3764      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3765      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3766      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3767 .ve
3768    Here the space allocated is sum of all the above values i.e 34, and
3769    hence pre-allocation is perfect.
3770 
3771    Level: intermediate
3772 
3773 .keywords: matrix, aij, compressed row, sparse, parallel
3774 
3775 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3776           MPIAIJ, MatCreateMPIAIJWithArrays()
3777 @*/
3778 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3779 {
3780   PetscErrorCode ierr;
3781   PetscMPIInt    size;
3782 
3783   PetscFunctionBegin;
3784   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3785   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3786   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3787   if (size > 1) {
3788     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3789     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3790   } else {
3791     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3792     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3793   }
3794   PetscFunctionReturn(0);
3795 }
3796 
3797 #undef __FUNCT__
3798 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3799 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3800 {
3801   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3802 
3803   PetscFunctionBegin;
3804   if (Ad)     *Ad     = a->A;
3805   if (Ao)     *Ao     = a->B;
3806   if (colmap) *colmap = a->garray;
3807   PetscFunctionReturn(0);
3808 }
3809 
3810 #undef __FUNCT__
3811 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3812 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3813 {
3814   PetscErrorCode ierr;
3815   PetscInt       i;
3816   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3817 
3818   PetscFunctionBegin;
3819   if (coloring->ctype == IS_COLORING_GLOBAL) {
3820     ISColoringValue *allcolors,*colors;
3821     ISColoring      ocoloring;
3822 
3823     /* set coloring for diagonal portion */
3824     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3825 
3826     /* set coloring for off-diagonal portion */
3827     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3828     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3829     for (i=0; i<a->B->cmap->n; i++) {
3830       colors[i] = allcolors[a->garray[i]];
3831     }
3832     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3833     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3834     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3835     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3836   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3837     ISColoringValue *colors;
3838     PetscInt        *larray;
3839     ISColoring      ocoloring;
3840 
3841     /* set coloring for diagonal portion */
3842     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3843     for (i=0; i<a->A->cmap->n; i++) {
3844       larray[i] = i + A->cmap->rstart;
3845     }
3846     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3847     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3848     for (i=0; i<a->A->cmap->n; i++) {
3849       colors[i] = coloring->colors[larray[i]];
3850     }
3851     ierr = PetscFree(larray);CHKERRQ(ierr);
3852     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3853     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3854     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3855 
3856     /* set coloring for off-diagonal portion */
3857     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3858     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3859     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3860     for (i=0; i<a->B->cmap->n; i++) {
3861       colors[i] = coloring->colors[larray[i]];
3862     }
3863     ierr = PetscFree(larray);CHKERRQ(ierr);
3864     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3865     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3866     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3867   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3868   PetscFunctionReturn(0);
3869 }
3870 
3871 #undef __FUNCT__
3872 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3873 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3874 {
3875   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3876   PetscErrorCode ierr;
3877 
3878   PetscFunctionBegin;
3879   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3880   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3881   PetscFunctionReturn(0);
3882 }
3883 
3884 #undef __FUNCT__
3885 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3886 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3887 {
3888   PetscErrorCode ierr;
3889   PetscInt       m,N,i,rstart,nnz,Ii;
3890   PetscInt       *indx;
3891   PetscScalar    *values;
3892 
3893   PetscFunctionBegin;
3894   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3895   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3896     PetscInt       *dnz,*onz,sum,bs,cbs;
3897 
3898     if (n == PETSC_DECIDE) {
3899       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3900     }
3901     /* Check sum(n) = N */
3902     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3903     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3904 
3905     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3906     rstart -= m;
3907 
3908     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3909     for (i=0; i<m; i++) {
3910       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3911       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3912       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3913     }
3914 
3915     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3916     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3917     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3918     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3919     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3920     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3921     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3922   }
3923 
3924   /* numeric phase */
3925   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3926   for (i=0; i<m; i++) {
3927     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3928     Ii   = i + rstart;
3929     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3930     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3931   }
3932   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3933   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3934   PetscFunctionReturn(0);
3935 }
3936 
3937 #undef __FUNCT__
3938 #define __FUNCT__ "MatFileSplit"
3939 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3940 {
3941   PetscErrorCode    ierr;
3942   PetscMPIInt       rank;
3943   PetscInt          m,N,i,rstart,nnz;
3944   size_t            len;
3945   const PetscInt    *indx;
3946   PetscViewer       out;
3947   char              *name;
3948   Mat               B;
3949   const PetscScalar *values;
3950 
3951   PetscFunctionBegin;
3952   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3953   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3954   /* Should this be the type of the diagonal block of A? */
3955   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3956   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3957   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3958   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3959   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3960   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3961   for (i=0; i<m; i++) {
3962     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3963     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3964     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3965   }
3966   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3967   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3968 
3969   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3970   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3971   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3972   sprintf(name,"%s.%d",outfile,rank);
3973   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3974   ierr = PetscFree(name);CHKERRQ(ierr);
3975   ierr = MatView(B,out);CHKERRQ(ierr);
3976   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3977   ierr = MatDestroy(&B);CHKERRQ(ierr);
3978   PetscFunctionReturn(0);
3979 }
3980 
3981 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3982 #undef __FUNCT__
3983 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3984 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3985 {
3986   PetscErrorCode      ierr;
3987   Mat_Merge_SeqsToMPI *merge;
3988   PetscContainer      container;
3989 
3990   PetscFunctionBegin;
3991   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3992   if (container) {
3993     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4003     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4004     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4005     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4006     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4007     ierr = PetscFree(merge);CHKERRQ(ierr);
4008     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4009   }
4010   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4011   PetscFunctionReturn(0);
4012 }
4013 
4014 #include <../src/mat/utils/freespace.h>
4015 #include <petscbt.h>
4016 
4017 #undef __FUNCT__
4018 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4019 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4020 {
4021   PetscErrorCode      ierr;
4022   MPI_Comm            comm;
4023   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4024   PetscMPIInt         size,rank,taga,*len_s;
4025   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4026   PetscInt            proc,m;
4027   PetscInt            **buf_ri,**buf_rj;
4028   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4029   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4030   MPI_Request         *s_waits,*r_waits;
4031   MPI_Status          *status;
4032   MatScalar           *aa=a->a;
4033   MatScalar           **abuf_r,*ba_i;
4034   Mat_Merge_SeqsToMPI *merge;
4035   PetscContainer      container;
4036 
4037   PetscFunctionBegin;
4038   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4039   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4040 
4041   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4042   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4043 
4044   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4045   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4046 
4047   bi     = merge->bi;
4048   bj     = merge->bj;
4049   buf_ri = merge->buf_ri;
4050   buf_rj = merge->buf_rj;
4051 
4052   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4053   owners = merge->rowmap->range;
4054   len_s  = merge->len_s;
4055 
4056   /* send and recv matrix values */
4057   /*-----------------------------*/
4058   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4059   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4060 
4061   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4062   for (proc=0,k=0; proc<size; proc++) {
4063     if (!len_s[proc]) continue;
4064     i    = owners[proc];
4065     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4066     k++;
4067   }
4068 
4069   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4070   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4071   ierr = PetscFree(status);CHKERRQ(ierr);
4072 
4073   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4074   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4075 
4076   /* insert mat values of mpimat */
4077   /*----------------------------*/
4078   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4079   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4080 
4081   for (k=0; k<merge->nrecv; k++) {
4082     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4083     nrows       = *(buf_ri_k[k]);
4084     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4085     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4086   }
4087 
4088   /* set values of ba */
4089   m = merge->rowmap->n;
4090   for (i=0; i<m; i++) {
4091     arow = owners[rank] + i;
4092     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4093     bnzi = bi[i+1] - bi[i];
4094     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4095 
4096     /* add local non-zero vals of this proc's seqmat into ba */
4097     anzi   = ai[arow+1] - ai[arow];
4098     aj     = a->j + ai[arow];
4099     aa     = a->a + ai[arow];
4100     nextaj = 0;
4101     for (j=0; nextaj<anzi; j++) {
4102       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4103         ba_i[j] += aa[nextaj++];
4104       }
4105     }
4106 
4107     /* add received vals into ba */
4108     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4109       /* i-th row */
4110       if (i == *nextrow[k]) {
4111         anzi   = *(nextai[k]+1) - *nextai[k];
4112         aj     = buf_rj[k] + *(nextai[k]);
4113         aa     = abuf_r[k] + *(nextai[k]);
4114         nextaj = 0;
4115         for (j=0; nextaj<anzi; j++) {
4116           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4117             ba_i[j] += aa[nextaj++];
4118           }
4119         }
4120         nextrow[k]++; nextai[k]++;
4121       }
4122     }
4123     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4124   }
4125   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4126   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4127 
4128   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4129   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4130   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4131   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4132   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4133   PetscFunctionReturn(0);
4134 }
4135 
4136 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4137 
4138 #undef __FUNCT__
4139 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4140 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4141 {
4142   PetscErrorCode      ierr;
4143   Mat                 B_mpi;
4144   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4145   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4146   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4147   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4148   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4149   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4150   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4151   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4152   MPI_Status          *status;
4153   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4154   PetscBT             lnkbt;
4155   Mat_Merge_SeqsToMPI *merge;
4156   PetscContainer      container;
4157 
4158   PetscFunctionBegin;
4159   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4160 
4161   /* make sure it is a PETSc comm */
4162   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4163   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4164   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4165 
4166   ierr = PetscNew(&merge);CHKERRQ(ierr);
4167   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4168 
4169   /* determine row ownership */
4170   /*---------------------------------------------------------*/
4171   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4172   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4173   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4174   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4175   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4176   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4177   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4178 
4179   m      = merge->rowmap->n;
4180   owners = merge->rowmap->range;
4181 
4182   /* determine the number of messages to send, their lengths */
4183   /*---------------------------------------------------------*/
4184   len_s = merge->len_s;
4185 
4186   len          = 0; /* length of buf_si[] */
4187   merge->nsend = 0;
4188   for (proc=0; proc<size; proc++) {
4189     len_si[proc] = 0;
4190     if (proc == rank) {
4191       len_s[proc] = 0;
4192     } else {
4193       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4194       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4195     }
4196     if (len_s[proc]) {
4197       merge->nsend++;
4198       nrows = 0;
4199       for (i=owners[proc]; i<owners[proc+1]; i++) {
4200         if (ai[i+1] > ai[i]) nrows++;
4201       }
4202       len_si[proc] = 2*(nrows+1);
4203       len         += len_si[proc];
4204     }
4205   }
4206 
4207   /* determine the number and length of messages to receive for ij-structure */
4208   /*-------------------------------------------------------------------------*/
4209   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4210   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4211 
4212   /* post the Irecv of j-structure */
4213   /*-------------------------------*/
4214   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4215   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4216 
4217   /* post the Isend of j-structure */
4218   /*--------------------------------*/
4219   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4220 
4221   for (proc=0, k=0; proc<size; proc++) {
4222     if (!len_s[proc]) continue;
4223     i    = owners[proc];
4224     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4225     k++;
4226   }
4227 
4228   /* receives and sends of j-structure are complete */
4229   /*------------------------------------------------*/
4230   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4231   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4232 
4233   /* send and recv i-structure */
4234   /*---------------------------*/
4235   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4236   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4237 
4238   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4239   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4240   for (proc=0,k=0; proc<size; proc++) {
4241     if (!len_s[proc]) continue;
4242     /* form outgoing message for i-structure:
4243          buf_si[0]:                 nrows to be sent
4244                [1:nrows]:           row index (global)
4245                [nrows+1:2*nrows+1]: i-structure index
4246     */
4247     /*-------------------------------------------*/
4248     nrows       = len_si[proc]/2 - 1;
4249     buf_si_i    = buf_si + nrows+1;
4250     buf_si[0]   = nrows;
4251     buf_si_i[0] = 0;
4252     nrows       = 0;
4253     for (i=owners[proc]; i<owners[proc+1]; i++) {
4254       anzi = ai[i+1] - ai[i];
4255       if (anzi) {
4256         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4257         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4258         nrows++;
4259       }
4260     }
4261     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4262     k++;
4263     buf_si += len_si[proc];
4264   }
4265 
4266   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4267   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4268 
4269   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4270   for (i=0; i<merge->nrecv; i++) {
4271     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4272   }
4273 
4274   ierr = PetscFree(len_si);CHKERRQ(ierr);
4275   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4276   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4277   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4278   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4279   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4280   ierr = PetscFree(status);CHKERRQ(ierr);
4281 
4282   /* compute a local seq matrix in each processor */
4283   /*----------------------------------------------*/
4284   /* allocate bi array and free space for accumulating nonzero column info */
4285   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4286   bi[0] = 0;
4287 
4288   /* create and initialize a linked list */
4289   nlnk = N+1;
4290   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4291 
4292   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4293   len  = ai[owners[rank+1]] - ai[owners[rank]];
4294   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4295 
4296   current_space = free_space;
4297 
4298   /* determine symbolic info for each local row */
4299   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4300 
4301   for (k=0; k<merge->nrecv; k++) {
4302     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4303     nrows       = *buf_ri_k[k];
4304     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4305     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4306   }
4307 
4308   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4309   len  = 0;
4310   for (i=0; i<m; i++) {
4311     bnzi = 0;
4312     /* add local non-zero cols of this proc's seqmat into lnk */
4313     arow  = owners[rank] + i;
4314     anzi  = ai[arow+1] - ai[arow];
4315     aj    = a->j + ai[arow];
4316     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4317     bnzi += nlnk;
4318     /* add received col data into lnk */
4319     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4320       if (i == *nextrow[k]) { /* i-th row */
4321         anzi  = *(nextai[k]+1) - *nextai[k];
4322         aj    = buf_rj[k] + *nextai[k];
4323         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4324         bnzi += nlnk;
4325         nextrow[k]++; nextai[k]++;
4326       }
4327     }
4328     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4329 
4330     /* if free space is not available, make more free space */
4331     if (current_space->local_remaining<bnzi) {
4332       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4333       nspacedouble++;
4334     }
4335     /* copy data into free space, then initialize lnk */
4336     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4337     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4338 
4339     current_space->array           += bnzi;
4340     current_space->local_used      += bnzi;
4341     current_space->local_remaining -= bnzi;
4342 
4343     bi[i+1] = bi[i] + bnzi;
4344   }
4345 
4346   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4347 
4348   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4349   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4350   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4351 
4352   /* create symbolic parallel matrix B_mpi */
4353   /*---------------------------------------*/
4354   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4355   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4356   if (n==PETSC_DECIDE) {
4357     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4358   } else {
4359     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4360   }
4361   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4362   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4363   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4364   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4365   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4366 
4367   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4368   B_mpi->assembled    = PETSC_FALSE;
4369   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4370   merge->bi           = bi;
4371   merge->bj           = bj;
4372   merge->buf_ri       = buf_ri;
4373   merge->buf_rj       = buf_rj;
4374   merge->coi          = NULL;
4375   merge->coj          = NULL;
4376   merge->owners_co    = NULL;
4377 
4378   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4379 
4380   /* attach the supporting struct to B_mpi for reuse */
4381   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4382   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4383   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4384   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4385   *mpimat = B_mpi;
4386 
4387   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4388   PetscFunctionReturn(0);
4389 }
4390 
4391 #undef __FUNCT__
4392 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4393 /*@C
4394       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4395                  matrices from each processor
4396 
4397     Collective on MPI_Comm
4398 
4399    Input Parameters:
4400 +    comm - the communicators the parallel matrix will live on
4401 .    seqmat - the input sequential matrices
4402 .    m - number of local rows (or PETSC_DECIDE)
4403 .    n - number of local columns (or PETSC_DECIDE)
4404 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4405 
4406    Output Parameter:
4407 .    mpimat - the parallel matrix generated
4408 
4409     Level: advanced
4410 
4411    Notes:
4412      The dimensions of the sequential matrix in each processor MUST be the same.
4413      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4414      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4415 @*/
4416 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4417 {
4418   PetscErrorCode ierr;
4419   PetscMPIInt    size;
4420 
4421   PetscFunctionBegin;
4422   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4423   if (size == 1) {
4424     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4425     if (scall == MAT_INITIAL_MATRIX) {
4426       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4427     } else {
4428       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4429     }
4430     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4431     PetscFunctionReturn(0);
4432   }
4433   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4434   if (scall == MAT_INITIAL_MATRIX) {
4435     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4436   }
4437   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4438   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4439   PetscFunctionReturn(0);
4440 }
4441 
4442 #undef __FUNCT__
4443 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4444 /*@
4445      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4446           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4447           with MatGetSize()
4448 
4449     Not Collective
4450 
4451    Input Parameters:
4452 +    A - the matrix
4453 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4454 
4455    Output Parameter:
4456 .    A_loc - the local sequential matrix generated
4457 
4458     Level: developer
4459 
4460 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4461 
4462 @*/
4463 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4464 {
4465   PetscErrorCode ierr;
4466   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4467   Mat_SeqAIJ     *mat,*a,*b;
4468   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4469   MatScalar      *aa,*ba,*cam;
4470   PetscScalar    *ca;
4471   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4472   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4473   PetscBool      match;
4474   MPI_Comm       comm;
4475   PetscMPIInt    size;
4476 
4477   PetscFunctionBegin;
4478   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4479   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4480   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4481   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4482   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4483 
4484   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4485   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4486   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4487   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4488   aa = a->a; ba = b->a;
4489   if (scall == MAT_INITIAL_MATRIX) {
4490     if (size == 1) {
4491       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4492       PetscFunctionReturn(0);
4493     }
4494 
4495     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4496     ci[0] = 0;
4497     for (i=0; i<am; i++) {
4498       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4499     }
4500     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4501     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4502     k    = 0;
4503     for (i=0; i<am; i++) {
4504       ncols_o = bi[i+1] - bi[i];
4505       ncols_d = ai[i+1] - ai[i];
4506       /* off-diagonal portion of A */
4507       for (jo=0; jo<ncols_o; jo++) {
4508         col = cmap[*bj];
4509         if (col >= cstart) break;
4510         cj[k]   = col; bj++;
4511         ca[k++] = *ba++;
4512       }
4513       /* diagonal portion of A */
4514       for (j=0; j<ncols_d; j++) {
4515         cj[k]   = cstart + *aj++;
4516         ca[k++] = *aa++;
4517       }
4518       /* off-diagonal portion of A */
4519       for (j=jo; j<ncols_o; j++) {
4520         cj[k]   = cmap[*bj++];
4521         ca[k++] = *ba++;
4522       }
4523     }
4524     /* put together the new matrix */
4525     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4526     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4527     /* Since these are PETSc arrays, change flags to free them as necessary. */
4528     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4529     mat->free_a  = PETSC_TRUE;
4530     mat->free_ij = PETSC_TRUE;
4531     mat->nonew   = 0;
4532   } else if (scall == MAT_REUSE_MATRIX) {
4533     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4534     ci = mat->i; cj = mat->j; cam = mat->a;
4535     for (i=0; i<am; i++) {
4536       /* off-diagonal portion of A */
4537       ncols_o = bi[i+1] - bi[i];
4538       for (jo=0; jo<ncols_o; jo++) {
4539         col = cmap[*bj];
4540         if (col >= cstart) break;
4541         *cam++ = *ba++; bj++;
4542       }
4543       /* diagonal portion of A */
4544       ncols_d = ai[i+1] - ai[i];
4545       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4546       /* off-diagonal portion of A */
4547       for (j=jo; j<ncols_o; j++) {
4548         *cam++ = *ba++; bj++;
4549       }
4550     }
4551   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4552   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4553   PetscFunctionReturn(0);
4554 }
4555 
4556 #undef __FUNCT__
4557 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4558 /*@C
4559      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4560 
4561     Not Collective
4562 
4563    Input Parameters:
4564 +    A - the matrix
4565 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4566 -    row, col - index sets of rows and columns to extract (or NULL)
4567 
4568    Output Parameter:
4569 .    A_loc - the local sequential matrix generated
4570 
4571     Level: developer
4572 
4573 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4574 
4575 @*/
4576 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4577 {
4578   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4579   PetscErrorCode ierr;
4580   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4581   IS             isrowa,iscola;
4582   Mat            *aloc;
4583   PetscBool      match;
4584 
4585   PetscFunctionBegin;
4586   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4587   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4588   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4589   if (!row) {
4590     start = A->rmap->rstart; end = A->rmap->rend;
4591     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4592   } else {
4593     isrowa = *row;
4594   }
4595   if (!col) {
4596     start = A->cmap->rstart;
4597     cmap  = a->garray;
4598     nzA   = a->A->cmap->n;
4599     nzB   = a->B->cmap->n;
4600     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4601     ncols = 0;
4602     for (i=0; i<nzB; i++) {
4603       if (cmap[i] < start) idx[ncols++] = cmap[i];
4604       else break;
4605     }
4606     imark = i;
4607     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4608     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4609     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4610   } else {
4611     iscola = *col;
4612   }
4613   if (scall != MAT_INITIAL_MATRIX) {
4614     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4615     aloc[0] = *A_loc;
4616   }
4617   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4618   *A_loc = aloc[0];
4619   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4620   if (!row) {
4621     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4622   }
4623   if (!col) {
4624     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4625   }
4626   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4627   PetscFunctionReturn(0);
4628 }
4629 
4630 #undef __FUNCT__
4631 #define __FUNCT__ "MatGetBrowsOfAcols"
4632 /*@C
4633     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4634 
4635     Collective on Mat
4636 
4637    Input Parameters:
4638 +    A,B - the matrices in mpiaij format
4639 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4640 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4641 
4642    Output Parameter:
4643 +    rowb, colb - index sets of rows and columns of B to extract
4644 -    B_seq - the sequential matrix generated
4645 
4646     Level: developer
4647 
4648 @*/
4649 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4650 {
4651   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4652   PetscErrorCode ierr;
4653   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4654   IS             isrowb,iscolb;
4655   Mat            *bseq=NULL;
4656 
4657   PetscFunctionBegin;
4658   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4659     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4660   }
4661   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4662 
4663   if (scall == MAT_INITIAL_MATRIX) {
4664     start = A->cmap->rstart;
4665     cmap  = a->garray;
4666     nzA   = a->A->cmap->n;
4667     nzB   = a->B->cmap->n;
4668     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4669     ncols = 0;
4670     for (i=0; i<nzB; i++) {  /* row < local row index */
4671       if (cmap[i] < start) idx[ncols++] = cmap[i];
4672       else break;
4673     }
4674     imark = i;
4675     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4676     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4677     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4678     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4679   } else {
4680     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4681     isrowb  = *rowb; iscolb = *colb;
4682     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4683     bseq[0] = *B_seq;
4684   }
4685   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4686   *B_seq = bseq[0];
4687   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4688   if (!rowb) {
4689     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4690   } else {
4691     *rowb = isrowb;
4692   }
4693   if (!colb) {
4694     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4695   } else {
4696     *colb = iscolb;
4697   }
4698   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4699   PetscFunctionReturn(0);
4700 }
4701 
4702 #undef __FUNCT__
4703 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4704 /*
4705     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4706     of the OFF-DIAGONAL portion of local A
4707 
4708     Collective on Mat
4709 
4710    Input Parameters:
4711 +    A,B - the matrices in mpiaij format
4712 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4713 
4714    Output Parameter:
4715 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4716 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4717 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4718 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4719 
4720     Level: developer
4721 
4722 */
4723 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4724 {
4725   VecScatter_MPI_General *gen_to,*gen_from;
4726   PetscErrorCode         ierr;
4727   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4728   Mat_SeqAIJ             *b_oth;
4729   VecScatter             ctx =a->Mvctx;
4730   MPI_Comm               comm;
4731   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4732   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4733   PetscScalar            *rvalues,*svalues;
4734   MatScalar              *b_otha,*bufa,*bufA;
4735   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4736   MPI_Request            *rwaits = NULL,*swaits = NULL;
4737   MPI_Status             *sstatus,rstatus;
4738   PetscMPIInt            jj,size;
4739   PetscInt               *cols,sbs,rbs;
4740   PetscScalar            *vals;
4741 
4742   PetscFunctionBegin;
4743   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4744   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4745 
4746   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4747     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4748   }
4749   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4750   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4751 
4752   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4753   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4754   rvalues  = gen_from->values; /* holds the length of receiving row */
4755   svalues  = gen_to->values;   /* holds the length of sending row */
4756   nrecvs   = gen_from->n;
4757   nsends   = gen_to->n;
4758 
4759   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4760   srow    = gen_to->indices;    /* local row index to be sent */
4761   sstarts = gen_to->starts;
4762   sprocs  = gen_to->procs;
4763   sstatus = gen_to->sstatus;
4764   sbs     = gen_to->bs;
4765   rstarts = gen_from->starts;
4766   rprocs  = gen_from->procs;
4767   rbs     = gen_from->bs;
4768 
4769   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4770   if (scall == MAT_INITIAL_MATRIX) {
4771     /* i-array */
4772     /*---------*/
4773     /*  post receives */
4774     for (i=0; i<nrecvs; i++) {
4775       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4776       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4777       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4778     }
4779 
4780     /* pack the outgoing message */
4781     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4782 
4783     sstartsj[0] = 0;
4784     rstartsj[0] = 0;
4785     len         = 0; /* total length of j or a array to be sent */
4786     k           = 0;
4787     for (i=0; i<nsends; i++) {
4788       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4789       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4790       for (j=0; j<nrows; j++) {
4791         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4792         for (l=0; l<sbs; l++) {
4793           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4794 
4795           rowlen[j*sbs+l] = ncols;
4796 
4797           len += ncols;
4798           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4799         }
4800         k++;
4801       }
4802       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4803 
4804       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4805     }
4806     /* recvs and sends of i-array are completed */
4807     i = nrecvs;
4808     while (i--) {
4809       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4810     }
4811     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4812 
4813     /* allocate buffers for sending j and a arrays */
4814     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4815     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4816 
4817     /* create i-array of B_oth */
4818     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4819 
4820     b_othi[0] = 0;
4821     len       = 0; /* total length of j or a array to be received */
4822     k         = 0;
4823     for (i=0; i<nrecvs; i++) {
4824       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4825       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4826       for (j=0; j<nrows; j++) {
4827         b_othi[k+1] = b_othi[k] + rowlen[j];
4828         len        += rowlen[j]; k++;
4829       }
4830       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4831     }
4832 
4833     /* allocate space for j and a arrrays of B_oth */
4834     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4835     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4836 
4837     /* j-array */
4838     /*---------*/
4839     /*  post receives of j-array */
4840     for (i=0; i<nrecvs; i++) {
4841       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4842       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4843     }
4844 
4845     /* pack the outgoing message j-array */
4846     k = 0;
4847     for (i=0; i<nsends; i++) {
4848       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4849       bufJ  = bufj+sstartsj[i];
4850       for (j=0; j<nrows; j++) {
4851         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4852         for (ll=0; ll<sbs; ll++) {
4853           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4854           for (l=0; l<ncols; l++) {
4855             *bufJ++ = cols[l];
4856           }
4857           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4858         }
4859       }
4860       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4861     }
4862 
4863     /* recvs and sends of j-array are completed */
4864     i = nrecvs;
4865     while (i--) {
4866       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4867     }
4868     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4869   } else if (scall == MAT_REUSE_MATRIX) {
4870     sstartsj = *startsj_s;
4871     rstartsj = *startsj_r;
4872     bufa     = *bufa_ptr;
4873     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4874     b_otha   = b_oth->a;
4875   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4876 
4877   /* a-array */
4878   /*---------*/
4879   /*  post receives of a-array */
4880   for (i=0; i<nrecvs; i++) {
4881     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4882     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4883   }
4884 
4885   /* pack the outgoing message a-array */
4886   k = 0;
4887   for (i=0; i<nsends; i++) {
4888     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4889     bufA  = bufa+sstartsj[i];
4890     for (j=0; j<nrows; j++) {
4891       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4892       for (ll=0; ll<sbs; ll++) {
4893         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4894         for (l=0; l<ncols; l++) {
4895           *bufA++ = vals[l];
4896         }
4897         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4898       }
4899     }
4900     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4901   }
4902   /* recvs and sends of a-array are completed */
4903   i = nrecvs;
4904   while (i--) {
4905     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4906   }
4907   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4908   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4909 
4910   if (scall == MAT_INITIAL_MATRIX) {
4911     /* put together the new matrix */
4912     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4913 
4914     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4915     /* Since these are PETSc arrays, change flags to free them as necessary. */
4916     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4917     b_oth->free_a  = PETSC_TRUE;
4918     b_oth->free_ij = PETSC_TRUE;
4919     b_oth->nonew   = 0;
4920 
4921     ierr = PetscFree(bufj);CHKERRQ(ierr);
4922     if (!startsj_s || !bufa_ptr) {
4923       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4924       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4925     } else {
4926       *startsj_s = sstartsj;
4927       *startsj_r = rstartsj;
4928       *bufa_ptr  = bufa;
4929     }
4930   }
4931   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4932   PetscFunctionReturn(0);
4933 }
4934 
4935 #undef __FUNCT__
4936 #define __FUNCT__ "MatGetCommunicationStructs"
4937 /*@C
4938   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4939 
4940   Not Collective
4941 
4942   Input Parameters:
4943 . A - The matrix in mpiaij format
4944 
4945   Output Parameter:
4946 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4947 . colmap - A map from global column index to local index into lvec
4948 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4949 
4950   Level: developer
4951 
4952 @*/
4953 #if defined(PETSC_USE_CTABLE)
4954 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4955 #else
4956 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4957 #endif
4958 {
4959   Mat_MPIAIJ *a;
4960 
4961   PetscFunctionBegin;
4962   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4963   PetscValidPointer(lvec, 2);
4964   PetscValidPointer(colmap, 3);
4965   PetscValidPointer(multScatter, 4);
4966   a = (Mat_MPIAIJ*) A->data;
4967   if (lvec) *lvec = a->lvec;
4968   if (colmap) *colmap = a->colmap;
4969   if (multScatter) *multScatter = a->Mvctx;
4970   PetscFunctionReturn(0);
4971 }
4972 
4973 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4974 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4975 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4976 #if defined(PETSC_HAVE_ELEMENTAL)
4977 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4978 #endif
4979 
4980 #undef __FUNCT__
4981 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4982 /*
4983     Computes (B'*A')' since computing B*A directly is untenable
4984 
4985                n                       p                          p
4986         (              )       (              )         (                  )
4987       m (      A       )  *  n (       B      )   =   m (         C        )
4988         (              )       (              )         (                  )
4989 
4990 */
4991 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4992 {
4993   PetscErrorCode ierr;
4994   Mat            At,Bt,Ct;
4995 
4996   PetscFunctionBegin;
4997   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4998   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4999   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5000   ierr = MatDestroy(&At);CHKERRQ(ierr);
5001   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5002   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5003   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5004   PetscFunctionReturn(0);
5005 }
5006 
5007 #undef __FUNCT__
5008 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5009 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5010 {
5011   PetscErrorCode ierr;
5012   PetscInt       m=A->rmap->n,n=B->cmap->n;
5013   Mat            Cmat;
5014 
5015   PetscFunctionBegin;
5016   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5017   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5018   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5019   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5020   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5021   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5022   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5023   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5024 
5025   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5026 
5027   *C = Cmat;
5028   PetscFunctionReturn(0);
5029 }
5030 
5031 /* ----------------------------------------------------------------*/
5032 #undef __FUNCT__
5033 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5034 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5035 {
5036   PetscErrorCode ierr;
5037 
5038   PetscFunctionBegin;
5039   if (scall == MAT_INITIAL_MATRIX) {
5040     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5041     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5042     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5043   }
5044   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5045   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5046   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5047   PetscFunctionReturn(0);
5048 }
5049 
5050 /*MC
5051    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5052 
5053    Options Database Keys:
5054 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5055 
5056   Level: beginner
5057 
5058 .seealso: MatCreateAIJ()
5059 M*/
5060 
5061 #undef __FUNCT__
5062 #define __FUNCT__ "MatCreate_MPIAIJ"
5063 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5064 {
5065   Mat_MPIAIJ     *b;
5066   PetscErrorCode ierr;
5067   PetscMPIInt    size;
5068 
5069   PetscFunctionBegin;
5070   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5071 
5072   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5073   B->data       = (void*)b;
5074   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5075   B->assembled  = PETSC_FALSE;
5076   B->insertmode = NOT_SET_VALUES;
5077   b->size       = size;
5078 
5079   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5080 
5081   /* build cache for off array entries formed */
5082   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5083 
5084   b->donotstash  = PETSC_FALSE;
5085   b->colmap      = 0;
5086   b->garray      = 0;
5087   b->roworiented = PETSC_TRUE;
5088 
5089   /* stuff used for matrix vector multiply */
5090   b->lvec  = NULL;
5091   b->Mvctx = NULL;
5092 
5093   /* stuff for MatGetRow() */
5094   b->rowindices   = 0;
5095   b->rowvalues    = 0;
5096   b->getrowactive = PETSC_FALSE;
5097 
5098   /* flexible pointer used in CUSP/CUSPARSE classes */
5099   b->spptr = NULL;
5100 
5101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5111 #if defined(PETSC_HAVE_ELEMENTAL)
5112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5113 #endif
5114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5117   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5118   PetscFunctionReturn(0);
5119 }
5120 
5121 #undef __FUNCT__
5122 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5123 /*@C
5124      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5125          and "off-diagonal" part of the matrix in CSR format.
5126 
5127    Collective on MPI_Comm
5128 
5129    Input Parameters:
5130 +  comm - MPI communicator
5131 .  m - number of local rows (Cannot be PETSC_DECIDE)
5132 .  n - This value should be the same as the local size used in creating the
5133        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5134        calculated if N is given) For square matrices n is almost always m.
5135 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5136 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5137 .   i - row indices for "diagonal" portion of matrix
5138 .   j - column indices
5139 .   a - matrix values
5140 .   oi - row indices for "off-diagonal" portion of matrix
5141 .   oj - column indices
5142 -   oa - matrix values
5143 
5144    Output Parameter:
5145 .   mat - the matrix
5146 
5147    Level: advanced
5148 
5149    Notes:
5150        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5151        must free the arrays once the matrix has been destroyed and not before.
5152 
5153        The i and j indices are 0 based
5154 
5155        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5156 
5157        This sets local rows and cannot be used to set off-processor values.
5158 
5159        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5160        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5161        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5162        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5163        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5164        communication if it is known that only local entries will be set.
5165 
5166 .keywords: matrix, aij, compressed row, sparse, parallel
5167 
5168 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5169           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5170 @*/
5171 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5172 {
5173   PetscErrorCode ierr;
5174   Mat_MPIAIJ     *maij;
5175 
5176   PetscFunctionBegin;
5177   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5178   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5179   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5180   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5181   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5182   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5183   maij = (Mat_MPIAIJ*) (*mat)->data;
5184 
5185   (*mat)->preallocated = PETSC_TRUE;
5186 
5187   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5188   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5189 
5190   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5191   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5192 
5193   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5194   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5195   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5196   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5197 
5198   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5199   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5200   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5201   PetscFunctionReturn(0);
5202 }
5203 
5204 /*
5205     Special version for direct calls from Fortran
5206 */
5207 #include <petsc/private/fortranimpl.h>
5208 
5209 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5210 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5211 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5212 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5213 #endif
5214 
5215 /* Change these macros so can be used in void function */
5216 #undef CHKERRQ
5217 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5218 #undef SETERRQ2
5219 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5220 #undef SETERRQ3
5221 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5222 #undef SETERRQ
5223 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5224 
5225 #undef __FUNCT__
5226 #define __FUNCT__ "matsetvaluesmpiaij_"
5227 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5228 {
5229   Mat            mat  = *mmat;
5230   PetscInt       m    = *mm, n = *mn;
5231   InsertMode     addv = *maddv;
5232   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5233   PetscScalar    value;
5234   PetscErrorCode ierr;
5235 
5236   MatCheckPreallocated(mat,1);
5237   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5238 
5239 #if defined(PETSC_USE_DEBUG)
5240   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5241 #endif
5242   {
5243     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5244     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5245     PetscBool roworiented = aij->roworiented;
5246 
5247     /* Some Variables required in the macro */
5248     Mat        A                 = aij->A;
5249     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5250     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5251     MatScalar  *aa               = a->a;
5252     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5253     Mat        B                 = aij->B;
5254     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5255     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5256     MatScalar  *ba               = b->a;
5257 
5258     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5259     PetscInt  nonew = a->nonew;
5260     MatScalar *ap1,*ap2;
5261 
5262     PetscFunctionBegin;
5263     for (i=0; i<m; i++) {
5264       if (im[i] < 0) continue;
5265 #if defined(PETSC_USE_DEBUG)
5266       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5267 #endif
5268       if (im[i] >= rstart && im[i] < rend) {
5269         row      = im[i] - rstart;
5270         lastcol1 = -1;
5271         rp1      = aj + ai[row];
5272         ap1      = aa + ai[row];
5273         rmax1    = aimax[row];
5274         nrow1    = ailen[row];
5275         low1     = 0;
5276         high1    = nrow1;
5277         lastcol2 = -1;
5278         rp2      = bj + bi[row];
5279         ap2      = ba + bi[row];
5280         rmax2    = bimax[row];
5281         nrow2    = bilen[row];
5282         low2     = 0;
5283         high2    = nrow2;
5284 
5285         for (j=0; j<n; j++) {
5286           if (roworiented) value = v[i*n+j];
5287           else value = v[i+j*m];
5288           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5289           if (in[j] >= cstart && in[j] < cend) {
5290             col = in[j] - cstart;
5291             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5292           } else if (in[j] < 0) continue;
5293 #if defined(PETSC_USE_DEBUG)
5294           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5295 #endif
5296           else {
5297             if (mat->was_assembled) {
5298               if (!aij->colmap) {
5299                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5300               }
5301 #if defined(PETSC_USE_CTABLE)
5302               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5303               col--;
5304 #else
5305               col = aij->colmap[in[j]] - 1;
5306 #endif
5307               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5308                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5309                 col  =  in[j];
5310                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5311                 B     = aij->B;
5312                 b     = (Mat_SeqAIJ*)B->data;
5313                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5314                 rp2   = bj + bi[row];
5315                 ap2   = ba + bi[row];
5316                 rmax2 = bimax[row];
5317                 nrow2 = bilen[row];
5318                 low2  = 0;
5319                 high2 = nrow2;
5320                 bm    = aij->B->rmap->n;
5321                 ba    = b->a;
5322               }
5323             } else col = in[j];
5324             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5325           }
5326         }
5327       } else if (!aij->donotstash) {
5328         if (roworiented) {
5329           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5330         } else {
5331           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5332         }
5333       }
5334     }
5335   }
5336   PetscFunctionReturnVoid();
5337 }
5338 
5339