xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3aa2d9e3a17455108487be9a174c0f069d9014ad)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *owners = A->rmap->range;
787   PetscInt       n      = A->rmap->n;
788   PetscSF        sf;
789   PetscInt      *lrows;
790   PetscSFNode   *rrows;
791   PetscInt       r, p = 0, len = 0;
792   PetscErrorCode ierr;
793 
794   PetscFunctionBegin;
795   /* Create SF where leaves are input rows and roots are owned rows */
796   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
797   for (r = 0; r < n; ++r) lrows[r] = -1;
798   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
799   for (r = 0; r < N; ++r) {
800     const PetscInt idx   = rows[r];
801     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
802     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
803       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
804     }
805     if (A->nooffproczerorows) {
806       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
807       lrows[len++] = idx - owners[p];
808     } else {
809       rrows[r].rank = p;
810       rrows[r].index = rows[r] - owners[p];
811     }
812   }
813   if (!A->nooffproczerorows) {
814     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
815     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
816     /* Collect flags for rows to be zeroed */
817     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
818     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
819     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
820     /* Compress and put in row numbers */
821     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
822   }
823   /* fix right hand side if needed */
824   if (x && b) {
825     const PetscScalar *xx;
826     PetscScalar       *bb;
827 
828     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
829     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
830     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
831     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
833   }
834   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
835   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
836   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
837     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
838   } else if (diag != 0.0) {
839     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
841     for (r = 0; r < len; ++r) {
842       const PetscInt row = lrows[r] + A->rmap->rstart;
843       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
844     }
845     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
846     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
847   } else {
848     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
849   }
850   ierr = PetscFree(lrows);CHKERRQ(ierr);
851 
852   /* only change matrix nonzero state if pattern was allowed to be changed */
853   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
854     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
855     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
856   }
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
862 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
865   PetscErrorCode    ierr;
866   PetscMPIInt       n = A->rmap->n;
867   PetscInt          i,j,r,m,p = 0,len = 0;
868   PetscInt          *lrows,*owners = A->rmap->range;
869   PetscSFNode       *rrows;
870   PetscSF           sf;
871   const PetscScalar *xx;
872   PetscScalar       *bb,*mask;
873   Vec               xmask,lmask;
874   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
875   const PetscInt    *aj, *ii,*ridx;
876   PetscScalar       *aa;
877 
878   PetscFunctionBegin;
879   /* Create SF where leaves are input rows and roots are owned rows */
880   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
881   for (r = 0; r < n; ++r) lrows[r] = -1;
882   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
883   for (r = 0; r < N; ++r) {
884     const PetscInt idx   = rows[r];
885     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
886     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
887       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
888     }
889     rrows[r].rank  = p;
890     rrows[r].index = rows[r] - owners[p];
891   }
892   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
893   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
894   /* Collect flags for rows to be zeroed */
895   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
896   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
897   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
898   /* Compress and put in row numbers */
899   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
900   /* zero diagonal part of matrix */
901   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
902   /* handle off diagonal part of matrix */
903   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
904   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
905   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
906   for (i=0; i<len; i++) bb[lrows[i]] = 1;
907   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
908   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
910   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
911   if (x) {
912     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
913     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
915     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
916   }
917   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
918   /* remove zeroed rows of off diagonal matrix */
919   ii = aij->i;
920   for (i=0; i<len; i++) {
921     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
922   }
923   /* loop over all elements of off process part of matrix zeroing removed columns*/
924   if (aij->compressedrow.use) {
925     m    = aij->compressedrow.nrows;
926     ii   = aij->compressedrow.i;
927     ridx = aij->compressedrow.rindex;
928     for (i=0; i<m; i++) {
929       n  = ii[i+1] - ii[i];
930       aj = aij->j + ii[i];
931       aa = aij->a + ii[i];
932 
933       for (j=0; j<n; j++) {
934         if (PetscAbsScalar(mask[*aj])) {
935           if (b) bb[*ridx] -= *aa*xx[*aj];
936           *aa = 0.0;
937         }
938         aa++;
939         aj++;
940       }
941       ridx++;
942     }
943   } else { /* do not use compressed row format */
944     m = l->B->rmap->n;
945     for (i=0; i<m; i++) {
946       n  = ii[i+1] - ii[i];
947       aj = aij->j + ii[i];
948       aa = aij->a + ii[i];
949       for (j=0; j<n; j++) {
950         if (PetscAbsScalar(mask[*aj])) {
951           if (b) bb[i] -= *aa*xx[*aj];
952           *aa = 0.0;
953         }
954         aa++;
955         aj++;
956       }
957     }
958   }
959   if (x) {
960     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
961     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
962   }
963   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
964   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
965   ierr = PetscFree(lrows);CHKERRQ(ierr);
966 
967   /* only change matrix nonzero state if pattern was allowed to be changed */
968   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
969     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
970     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
971   }
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatMult_MPIAIJ"
977 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
978 {
979   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
980   PetscErrorCode ierr;
981   PetscInt       nt;
982 
983   PetscFunctionBegin;
984   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
985   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
986   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
988   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
995 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
996 {
997   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
998   PetscErrorCode ierr;
999 
1000   PetscFunctionBegin;
1001   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1007 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1008 {
1009   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1010   PetscErrorCode ierr;
1011 
1012   PetscFunctionBegin;
1013   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1015   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1023 {
1024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1025   PetscErrorCode ierr;
1026   PetscBool      merged;
1027 
1028   PetscFunctionBegin;
1029   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1030   /* do nondiagonal part */
1031   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1032   if (!merged) {
1033     /* send it on its way */
1034     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1035     /* do local part */
1036     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1037     /* receive remote parts: note this assumes the values are not actually */
1038     /* added in yy until the next line, */
1039     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1040   } else {
1041     /* do local part */
1042     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1043     /* send it on its way */
1044     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1045     /* values actually were received in the Begin() but we need to call this nop */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1053 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1054 {
1055   MPI_Comm       comm;
1056   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1057   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1058   IS             Me,Notme;
1059   PetscErrorCode ierr;
1060   PetscInt       M,N,first,last,*notme,i;
1061   PetscMPIInt    size;
1062 
1063   PetscFunctionBegin;
1064   /* Easy test: symmetric diagonal block */
1065   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1066   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1067   if (!*f) PetscFunctionReturn(0);
1068   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1069   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1070   if (size == 1) PetscFunctionReturn(0);
1071 
1072   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1073   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1074   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1075   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1076   for (i=0; i<first; i++) notme[i] = i;
1077   for (i=last; i<M; i++) notme[i-last+first] = i;
1078   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1079   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1080   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1081   Aoff = Aoffs[0];
1082   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1083   Boff = Boffs[0];
1084   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1085   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1086   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1087   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1088   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1089   ierr = PetscFree(notme);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1095 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1096 {
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101   /* do nondiagonal part */
1102   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1103   /* send it on its way */
1104   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1105   /* do local part */
1106   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1107   /* receive remote parts */
1108   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 /*
1113   This only works correctly for square matrices where the subblock A->A is the
1114    diagonal block
1115 */
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1118 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1119 {
1120   PetscErrorCode ierr;
1121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1122 
1123   PetscFunctionBegin;
1124   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1125   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1126   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatScale_MPIAIJ"
1132 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1139   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatDestroy_MPIAIJ"
1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1146 {
1147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151 #if defined(PETSC_USE_LOG)
1152   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1153 #endif
1154   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1155   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1156   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1157   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1158 #if defined(PETSC_USE_CTABLE)
1159   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1160 #else
1161   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1162 #endif
1163   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1164   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1165   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1166   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1167   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1168   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1169 
1170   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1179 #if defined(PETSC_HAVE_ELEMENTAL)
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1181 #endif
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1187 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1188 {
1189   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1190   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1191   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1192   PetscErrorCode ierr;
1193   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1194   int            fd;
1195   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1196   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1197   PetscScalar    *column_values;
1198   PetscInt       message_count,flowcontrolcount;
1199   FILE           *file;
1200 
1201   PetscFunctionBegin;
1202   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1203   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1204   nz   = A->nz + B->nz;
1205   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1206   if (!rank) {
1207     header[0] = MAT_FILE_CLASSID;
1208     header[1] = mat->rmap->N;
1209     header[2] = mat->cmap->N;
1210 
1211     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1213     /* get largest number of rows any processor has */
1214     rlen  = mat->rmap->n;
1215     range = mat->rmap->range;
1216     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1217   } else {
1218     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     rlen = mat->rmap->n;
1220   }
1221 
1222   /* load up the local row counts */
1223   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1224   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1225 
1226   /* store the row lengths to the file */
1227   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1228   if (!rank) {
1229     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1230     for (i=1; i<size; i++) {
1231       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1232       rlen = range[i+1] - range[i];
1233       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1235     }
1236     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1237   } else {
1238     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1239     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1241   }
1242   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1243 
1244   /* load up the local column indices */
1245   nzmax = nz; /* th processor needs space a largest processor needs */
1246   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1248   cnt   = 0;
1249   for (i=0; i<mat->rmap->n; i++) {
1250     for (j=B->i[i]; j<B->i[i+1]; j++) {
1251       if ((col = garray[B->j[j]]) > cstart) break;
1252       column_indices[cnt++] = col;
1253     }
1254     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1255     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1256   }
1257   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1258 
1259   /* store the column indices to the file */
1260   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1261   if (!rank) {
1262     MPI_Status status;
1263     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     for (i=1; i<size; i++) {
1265       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1266       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1267       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1268       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1270     }
1271     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1272   } else {
1273     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1274     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1275     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1277   }
1278   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1279 
1280   /* load up the local column values */
1281   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1282   cnt  = 0;
1283   for (i=0; i<mat->rmap->n; i++) {
1284     for (j=B->i[i]; j<B->i[i+1]; j++) {
1285       if (garray[B->j[j]] > cstart) break;
1286       column_values[cnt++] = B->a[j];
1287     }
1288     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1289     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1290   }
1291   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1292 
1293   /* store the column values to the file */
1294   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1295   if (!rank) {
1296     MPI_Status status;
1297     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1298     for (i=1; i<size; i++) {
1299       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1300       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1301       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1302       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1303       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1304     }
1305     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1306   } else {
1307     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1308     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1309     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1311   }
1312   ierr = PetscFree(column_values);CHKERRQ(ierr);
1313 
1314   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1315   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #include <petscdraw.h>
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1322 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1323 {
1324   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1325   PetscErrorCode    ierr;
1326   PetscMPIInt       rank = aij->rank,size = aij->size;
1327   PetscBool         isdraw,iascii,isbinary;
1328   PetscViewer       sviewer;
1329   PetscViewerFormat format;
1330 
1331   PetscFunctionBegin;
1332   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1333   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1334   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1335   if (iascii) {
1336     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1337     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1338       MatInfo   info;
1339       PetscBool inodes;
1340 
1341       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1342       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1343       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1344       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1345       if (!inodes) {
1346         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1347                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1348       } else {
1349         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1350                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1351       }
1352       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1353       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1354       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1355       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1356       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1357       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1358       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1359       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1360       PetscFunctionReturn(0);
1361     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1362       PetscInt inodecount,inodelimit,*inodes;
1363       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1364       if (inodes) {
1365         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1366       } else {
1367         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1368       }
1369       PetscFunctionReturn(0);
1370     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1371       PetscFunctionReturn(0);
1372     }
1373   } else if (isbinary) {
1374     if (size == 1) {
1375       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1376       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1377     } else {
1378       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1379     }
1380     PetscFunctionReturn(0);
1381   } else if (isdraw) {
1382     PetscDraw draw;
1383     PetscBool isnull;
1384     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1385     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1386   }
1387 
1388   {
1389     /* assemble the entire matrix onto first processor. */
1390     Mat        A;
1391     Mat_SeqAIJ *Aloc;
1392     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1393     MatScalar  *a;
1394 
1395     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1396     if (!rank) {
1397       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1398     } else {
1399       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1400     }
1401     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1402     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1403     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1404     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1405     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1406 
1407     /* copy over the A part */
1408     Aloc = (Mat_SeqAIJ*)aij->A->data;
1409     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1410     row  = mat->rmap->rstart;
1411     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1412     for (i=0; i<m; i++) {
1413       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1414       row++;
1415       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1416     }
1417     aj = Aloc->j;
1418     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1419 
1420     /* copy over the B part */
1421     Aloc = (Mat_SeqAIJ*)aij->B->data;
1422     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1423     row  = mat->rmap->rstart;
1424     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1425     ct   = cols;
1426     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1427     for (i=0; i<m; i++) {
1428       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1429       row++;
1430       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1431     }
1432     ierr = PetscFree(ct);CHKERRQ(ierr);
1433     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1434     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1435     /*
1436        Everyone has to call to draw the matrix since the graphics waits are
1437        synchronized across all processors that share the PetscDraw object
1438     */
1439     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1440     if (!rank) {
1441       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1442       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1443     }
1444     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1445     ierr = MatDestroy(&A);CHKERRQ(ierr);
1446   }
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 #undef __FUNCT__
1451 #define __FUNCT__ "MatView_MPIAIJ"
1452 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1453 {
1454   PetscErrorCode ierr;
1455   PetscBool      iascii,isdraw,issocket,isbinary;
1456 
1457   PetscFunctionBegin;
1458   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1459   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1460   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1461   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1462   if (iascii || isdraw || isbinary || issocket) {
1463     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1464   }
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatSOR_MPIAIJ"
1470 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1471 {
1472   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1473   PetscErrorCode ierr;
1474   Vec            bb1 = 0;
1475   PetscBool      hasop;
1476 
1477   PetscFunctionBegin;
1478   if (flag == SOR_APPLY_UPPER) {
1479     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1480     PetscFunctionReturn(0);
1481   }
1482 
1483   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1484     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1485   }
1486 
1487   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1488     if (flag & SOR_ZERO_INITIAL_GUESS) {
1489       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1490       its--;
1491     }
1492 
1493     while (its--) {
1494       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1495       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1496 
1497       /* update rhs: bb1 = bb - B*x */
1498       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1499       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1500 
1501       /* local sweep */
1502       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1503     }
1504   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1505     if (flag & SOR_ZERO_INITIAL_GUESS) {
1506       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1507       its--;
1508     }
1509     while (its--) {
1510       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1511       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1512 
1513       /* update rhs: bb1 = bb - B*x */
1514       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1515       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1516 
1517       /* local sweep */
1518       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1519     }
1520   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1521     if (flag & SOR_ZERO_INITIAL_GUESS) {
1522       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1523       its--;
1524     }
1525     while (its--) {
1526       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1527       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1528 
1529       /* update rhs: bb1 = bb - B*x */
1530       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1531       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1532 
1533       /* local sweep */
1534       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1535     }
1536   } else if (flag & SOR_EISENSTAT) {
1537     Vec xx1;
1538 
1539     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1540     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1541 
1542     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1543     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1544     if (!mat->diag) {
1545       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1546       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1547     }
1548     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1549     if (hasop) {
1550       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1551     } else {
1552       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1553     }
1554     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1555 
1556     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1557 
1558     /* local sweep */
1559     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1560     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1561     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1562   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1563 
1564   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatPermute_MPIAIJ"
1570 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1571 {
1572   Mat            aA,aB,Aperm;
1573   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1574   PetscScalar    *aa,*ba;
1575   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1576   PetscSF        rowsf,sf;
1577   IS             parcolp = NULL;
1578   PetscBool      done;
1579   PetscErrorCode ierr;
1580 
1581   PetscFunctionBegin;
1582   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1583   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1584   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1585   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1586 
1587   /* Invert row permutation to find out where my rows should go */
1588   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1589   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1590   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1591   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1592   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1593   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1594 
1595   /* Invert column permutation to find out where my columns should go */
1596   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1597   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1598   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1599   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1600   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1601   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1602   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1603 
1604   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1605   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1606   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1607 
1608   /* Find out where my gcols should go */
1609   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1610   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1611   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1612   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1613   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1614   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1615   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1616   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1617 
1618   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1619   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1620   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1621   for (i=0; i<m; i++) {
1622     PetscInt row = rdest[i],rowner;
1623     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1624     for (j=ai[i]; j<ai[i+1]; j++) {
1625       PetscInt cowner,col = cdest[aj[j]];
1626       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1627       if (rowner == cowner) dnnz[i]++;
1628       else onnz[i]++;
1629     }
1630     for (j=bi[i]; j<bi[i+1]; j++) {
1631       PetscInt cowner,col = gcdest[bj[j]];
1632       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1633       if (rowner == cowner) dnnz[i]++;
1634       else onnz[i]++;
1635     }
1636   }
1637   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1638   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1639   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1640   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1641   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1642 
1643   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1644   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1645   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1646   for (i=0; i<m; i++) {
1647     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1648     PetscInt j0,rowlen;
1649     rowlen = ai[i+1] - ai[i];
1650     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1651       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1652       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1653     }
1654     rowlen = bi[i+1] - bi[i];
1655     for (j0=j=0; j<rowlen; j0=j) {
1656       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1657       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1658     }
1659   }
1660   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1661   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1662   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1663   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1664   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1665   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1666   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1667   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1668   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1669   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1670   *B = Aperm;
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1676 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1677 {
1678   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1679   PetscErrorCode ierr;
1680 
1681   PetscFunctionBegin;
1682   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1683   if (ghosts) *ghosts = aij->garray;
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1689 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1690 {
1691   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1692   Mat            A    = mat->A,B = mat->B;
1693   PetscErrorCode ierr;
1694   PetscReal      isend[5],irecv[5];
1695 
1696   PetscFunctionBegin;
1697   info->block_size = 1.0;
1698   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1699 
1700   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1701   isend[3] = info->memory;  isend[4] = info->mallocs;
1702 
1703   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1704 
1705   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1706   isend[3] += info->memory;  isend[4] += info->mallocs;
1707   if (flag == MAT_LOCAL) {
1708     info->nz_used      = isend[0];
1709     info->nz_allocated = isend[1];
1710     info->nz_unneeded  = isend[2];
1711     info->memory       = isend[3];
1712     info->mallocs      = isend[4];
1713   } else if (flag == MAT_GLOBAL_MAX) {
1714     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1715 
1716     info->nz_used      = irecv[0];
1717     info->nz_allocated = irecv[1];
1718     info->nz_unneeded  = irecv[2];
1719     info->memory       = irecv[3];
1720     info->mallocs      = irecv[4];
1721   } else if (flag == MAT_GLOBAL_SUM) {
1722     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1723 
1724     info->nz_used      = irecv[0];
1725     info->nz_allocated = irecv[1];
1726     info->nz_unneeded  = irecv[2];
1727     info->memory       = irecv[3];
1728     info->mallocs      = irecv[4];
1729   }
1730   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1731   info->fill_ratio_needed = 0;
1732   info->factor_mallocs    = 0;
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatSetOption_MPIAIJ"
1738 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1739 {
1740   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1741   PetscErrorCode ierr;
1742 
1743   PetscFunctionBegin;
1744   switch (op) {
1745   case MAT_NEW_NONZERO_LOCATIONS:
1746   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1747   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1748   case MAT_KEEP_NONZERO_PATTERN:
1749   case MAT_NEW_NONZERO_LOCATION_ERR:
1750   case MAT_USE_INODES:
1751   case MAT_IGNORE_ZERO_ENTRIES:
1752     MatCheckPreallocated(A,1);
1753     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1754     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1755     break;
1756   case MAT_ROW_ORIENTED:
1757     a->roworiented = flg;
1758 
1759     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1760     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1761     break;
1762   case MAT_NEW_DIAGONALS:
1763     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1764     break;
1765   case MAT_IGNORE_OFF_PROC_ENTRIES:
1766     a->donotstash = flg;
1767     break;
1768   case MAT_SPD:
1769     A->spd_set = PETSC_TRUE;
1770     A->spd     = flg;
1771     if (flg) {
1772       A->symmetric                  = PETSC_TRUE;
1773       A->structurally_symmetric     = PETSC_TRUE;
1774       A->symmetric_set              = PETSC_TRUE;
1775       A->structurally_symmetric_set = PETSC_TRUE;
1776     }
1777     break;
1778   case MAT_SYMMETRIC:
1779     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1780     break;
1781   case MAT_STRUCTURALLY_SYMMETRIC:
1782     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1783     break;
1784   case MAT_HERMITIAN:
1785     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1786     break;
1787   case MAT_SYMMETRY_ETERNAL:
1788     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1789     break;
1790   default:
1791     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1792   }
1793   PetscFunctionReturn(0);
1794 }
1795 
1796 #undef __FUNCT__
1797 #define __FUNCT__ "MatGetRow_MPIAIJ"
1798 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1799 {
1800   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1801   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1802   PetscErrorCode ierr;
1803   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1804   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1805   PetscInt       *cmap,*idx_p;
1806 
1807   PetscFunctionBegin;
1808   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1809   mat->getrowactive = PETSC_TRUE;
1810 
1811   if (!mat->rowvalues && (idx || v)) {
1812     /*
1813         allocate enough space to hold information from the longest row.
1814     */
1815     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1816     PetscInt   max = 1,tmp;
1817     for (i=0; i<matin->rmap->n; i++) {
1818       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1819       if (max < tmp) max = tmp;
1820     }
1821     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1822   }
1823 
1824   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1825   lrow = row - rstart;
1826 
1827   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1828   if (!v)   {pvA = 0; pvB = 0;}
1829   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1830   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1831   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1832   nztot = nzA + nzB;
1833 
1834   cmap = mat->garray;
1835   if (v  || idx) {
1836     if (nztot) {
1837       /* Sort by increasing column numbers, assuming A and B already sorted */
1838       PetscInt imark = -1;
1839       if (v) {
1840         *v = v_p = mat->rowvalues;
1841         for (i=0; i<nzB; i++) {
1842           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1843           else break;
1844         }
1845         imark = i;
1846         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1847         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1848       }
1849       if (idx) {
1850         *idx = idx_p = mat->rowindices;
1851         if (imark > -1) {
1852           for (i=0; i<imark; i++) {
1853             idx_p[i] = cmap[cworkB[i]];
1854           }
1855         } else {
1856           for (i=0; i<nzB; i++) {
1857             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1858             else break;
1859           }
1860           imark = i;
1861         }
1862         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1863         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1864       }
1865     } else {
1866       if (idx) *idx = 0;
1867       if (v)   *v   = 0;
1868     }
1869   }
1870   *nz  = nztot;
1871   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1872   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1878 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1879 {
1880   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1881 
1882   PetscFunctionBegin;
1883   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1884   aij->getrowactive = PETSC_FALSE;
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 #undef __FUNCT__
1889 #define __FUNCT__ "MatNorm_MPIAIJ"
1890 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1891 {
1892   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1893   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1894   PetscErrorCode ierr;
1895   PetscInt       i,j,cstart = mat->cmap->rstart;
1896   PetscReal      sum = 0.0;
1897   MatScalar      *v;
1898 
1899   PetscFunctionBegin;
1900   if (aij->size == 1) {
1901     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1902   } else {
1903     if (type == NORM_FROBENIUS) {
1904       v = amat->a;
1905       for (i=0; i<amat->nz; i++) {
1906         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1907       }
1908       v = bmat->a;
1909       for (i=0; i<bmat->nz; i++) {
1910         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1911       }
1912       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1913       *norm = PetscSqrtReal(*norm);
1914     } else if (type == NORM_1) { /* max column norm */
1915       PetscReal *tmp,*tmp2;
1916       PetscInt  *jj,*garray = aij->garray;
1917       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1918       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1919       *norm = 0.0;
1920       v     = amat->a; jj = amat->j;
1921       for (j=0; j<amat->nz; j++) {
1922         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1923       }
1924       v = bmat->a; jj = bmat->j;
1925       for (j=0; j<bmat->nz; j++) {
1926         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1927       }
1928       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1929       for (j=0; j<mat->cmap->N; j++) {
1930         if (tmp2[j] > *norm) *norm = tmp2[j];
1931       }
1932       ierr = PetscFree(tmp);CHKERRQ(ierr);
1933       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1934     } else if (type == NORM_INFINITY) { /* max row norm */
1935       PetscReal ntemp = 0.0;
1936       for (j=0; j<aij->A->rmap->n; j++) {
1937         v   = amat->a + amat->i[j];
1938         sum = 0.0;
1939         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1940           sum += PetscAbsScalar(*v); v++;
1941         }
1942         v = bmat->a + bmat->i[j];
1943         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1944           sum += PetscAbsScalar(*v); v++;
1945         }
1946         if (sum > ntemp) ntemp = sum;
1947       }
1948       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1949     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1950   }
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatTranspose_MPIAIJ"
1956 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1957 {
1958   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1959   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1960   PetscErrorCode ierr;
1961   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1962   PetscInt       cstart = A->cmap->rstart,ncol;
1963   Mat            B;
1964   MatScalar      *array;
1965 
1966   PetscFunctionBegin;
1967   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1968 
1969   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1970   ai = Aloc->i; aj = Aloc->j;
1971   bi = Bloc->i; bj = Bloc->j;
1972   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1973     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1974     PetscSFNode          *oloc;
1975     PETSC_UNUSED PetscSF sf;
1976 
1977     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1978     /* compute d_nnz for preallocation */
1979     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1980     for (i=0; i<ai[ma]; i++) {
1981       d_nnz[aj[i]]++;
1982       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1983     }
1984     /* compute local off-diagonal contributions */
1985     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1986     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1987     /* map those to global */
1988     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1989     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1990     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1991     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1992     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1993     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1994     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1995 
1996     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1997     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1998     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1999     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2000     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2001     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2002   } else {
2003     B    = *matout;
2004     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2005     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2006   }
2007 
2008   /* copy over the A part */
2009   array = Aloc->a;
2010   row   = A->rmap->rstart;
2011   for (i=0; i<ma; i++) {
2012     ncol = ai[i+1]-ai[i];
2013     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2014     row++;
2015     array += ncol; aj += ncol;
2016   }
2017   aj = Aloc->j;
2018   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2019 
2020   /* copy over the B part */
2021   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2022   array = Bloc->a;
2023   row   = A->rmap->rstart;
2024   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2025   cols_tmp = cols;
2026   for (i=0; i<mb; i++) {
2027     ncol = bi[i+1]-bi[i];
2028     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2029     row++;
2030     array += ncol; cols_tmp += ncol;
2031   }
2032   ierr = PetscFree(cols);CHKERRQ(ierr);
2033 
2034   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2035   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2036   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2037     *matout = B;
2038   } else {
2039     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2040   }
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 #undef __FUNCT__
2045 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2046 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2047 {
2048   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2049   Mat            a    = aij->A,b = aij->B;
2050   PetscErrorCode ierr;
2051   PetscInt       s1,s2,s3;
2052 
2053   PetscFunctionBegin;
2054   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2055   if (rr) {
2056     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2057     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2058     /* Overlap communication with computation. */
2059     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2060   }
2061   if (ll) {
2062     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2063     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2064     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2065   }
2066   /* scale  the diagonal block */
2067   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2068 
2069   if (rr) {
2070     /* Do a scatter end and then right scale the off-diagonal block */
2071     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2072     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2073   }
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2079 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2080 {
2081   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2082   PetscErrorCode ierr;
2083 
2084   PetscFunctionBegin;
2085   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 #undef __FUNCT__
2090 #define __FUNCT__ "MatEqual_MPIAIJ"
2091 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2092 {
2093   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2094   Mat            a,b,c,d;
2095   PetscBool      flg;
2096   PetscErrorCode ierr;
2097 
2098   PetscFunctionBegin;
2099   a = matA->A; b = matA->B;
2100   c = matB->A; d = matB->B;
2101 
2102   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2103   if (flg) {
2104     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2105   }
2106   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2107   PetscFunctionReturn(0);
2108 }
2109 
2110 #undef __FUNCT__
2111 #define __FUNCT__ "MatCopy_MPIAIJ"
2112 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2113 {
2114   PetscErrorCode ierr;
2115   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2116   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2117 
2118   PetscFunctionBegin;
2119   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2120   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2121     /* because of the column compression in the off-processor part of the matrix a->B,
2122        the number of columns in a->B and b->B may be different, hence we cannot call
2123        the MatCopy() directly on the two parts. If need be, we can provide a more
2124        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2125        then copying the submatrices */
2126     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2127   } else {
2128     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2129     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2130   }
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatSetUp_MPIAIJ"
2136 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2137 {
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 /*
2146    Computes the number of nonzeros per row needed for preallocation when X and Y
2147    have different nonzero structure.
2148 */
2149 #undef __FUNCT__
2150 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2151 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2152 {
2153   PetscInt       i,j,k,nzx,nzy;
2154 
2155   PetscFunctionBegin;
2156   /* Set the number of nonzeros in the new matrix */
2157   for (i=0; i<m; i++) {
2158     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2159     nzx = xi[i+1] - xi[i];
2160     nzy = yi[i+1] - yi[i];
2161     nnz[i] = 0;
2162     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2163       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2164       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2165       nnz[i]++;
2166     }
2167     for (; k<nzy; k++) nnz[i]++;
2168   }
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2173 #undef __FUNCT__
2174 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2175 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2176 {
2177   PetscErrorCode ierr;
2178   PetscInt       m = Y->rmap->N;
2179   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2180   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2181 
2182   PetscFunctionBegin;
2183   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 #undef __FUNCT__
2188 #define __FUNCT__ "MatAXPY_MPIAIJ"
2189 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2190 {
2191   PetscErrorCode ierr;
2192   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2193   PetscBLASInt   bnz,one=1;
2194   Mat_SeqAIJ     *x,*y;
2195 
2196   PetscFunctionBegin;
2197   if (str == SAME_NONZERO_PATTERN) {
2198     PetscScalar alpha = a;
2199     x    = (Mat_SeqAIJ*)xx->A->data;
2200     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2201     y    = (Mat_SeqAIJ*)yy->A->data;
2202     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2203     x    = (Mat_SeqAIJ*)xx->B->data;
2204     y    = (Mat_SeqAIJ*)yy->B->data;
2205     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2206     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2207     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2208   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2209     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2210   } else {
2211     Mat      B;
2212     PetscInt *nnz_d,*nnz_o;
2213     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2214     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2215     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2216     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2217     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2218     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2219     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2220     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2221     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2222     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2223     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2224     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2225     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2226     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2227   }
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatConjugate_MPIAIJ"
2235 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2236 {
2237 #if defined(PETSC_USE_COMPLEX)
2238   PetscErrorCode ierr;
2239   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2240 
2241   PetscFunctionBegin;
2242   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2243   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2244 #else
2245   PetscFunctionBegin;
2246 #endif
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 #undef __FUNCT__
2251 #define __FUNCT__ "MatRealPart_MPIAIJ"
2252 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2253 {
2254   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2255   PetscErrorCode ierr;
2256 
2257   PetscFunctionBegin;
2258   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2259   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 #undef __FUNCT__
2264 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2265 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2266 {
2267   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2268   PetscErrorCode ierr;
2269 
2270   PetscFunctionBegin;
2271   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2272   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 #if defined(PETSC_HAVE_PBGL)
2277 
2278 #include <boost/parallel/mpi/bsp_process_group.hpp>
2279 #include <boost/graph/distributed/ilu_default_graph.hpp>
2280 #include <boost/graph/distributed/ilu_0_block.hpp>
2281 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2282 #include <boost/graph/distributed/petsc/interface.hpp>
2283 #include <boost/multi_array.hpp>
2284 #include <boost/parallel/distributed_property_map->hpp>
2285 
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2288 /*
2289   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2290 */
2291 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2292 {
2293   namespace petsc = boost::distributed::petsc;
2294 
2295   namespace graph_dist = boost::graph::distributed;
2296   using boost::graph::distributed::ilu_default::process_group_type;
2297   using boost::graph::ilu_permuted;
2298 
2299   PetscBool      row_identity, col_identity;
2300   PetscContainer c;
2301   PetscInt       m, n, M, N;
2302   PetscErrorCode ierr;
2303 
2304   PetscFunctionBegin;
2305   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2306   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2307   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2308   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2309 
2310   process_group_type pg;
2311   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2312   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2313   lgraph_type& level_graph = *lgraph_p;
2314   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2315 
2316   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2317   ilu_permuted(level_graph);
2318 
2319   /* put together the new matrix */
2320   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2321   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2322   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2323   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2324   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2325   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2326   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2327   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2328 
2329   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2330   ierr = PetscContainerSetPointer(c, lgraph_p);
2331   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2332   ierr = PetscContainerDestroy(&c);
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 #undef __FUNCT__
2337 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2338 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2339 {
2340   PetscFunctionBegin;
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatSolve_MPIAIJ"
2346 /*
2347   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2348 */
2349 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2350 {
2351   namespace graph_dist = boost::graph::distributed;
2352 
2353   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2354   lgraph_type    *lgraph_p;
2355   PetscContainer c;
2356   PetscErrorCode ierr;
2357 
2358   PetscFunctionBegin;
2359   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2360   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2361   ierr = VecCopy(b, x);CHKERRQ(ierr);
2362 
2363   PetscScalar *array_x;
2364   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2365   PetscInt sx;
2366   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2367 
2368   PetscScalar *array_b;
2369   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2370   PetscInt sb;
2371   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2372 
2373   lgraph_type& level_graph = *lgraph_p;
2374   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2375 
2376   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2377   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2378   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2379 
2380   typedef boost::iterator_property_map<array_ref_type::iterator,
2381                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2382   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2383   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2384 
2385   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2386   PetscFunctionReturn(0);
2387 }
2388 #endif
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2392 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2393 {
2394   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2395   PetscErrorCode ierr;
2396   PetscInt       i,*idxb = 0;
2397   PetscScalar    *va,*vb;
2398   Vec            vtmp;
2399 
2400   PetscFunctionBegin;
2401   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2402   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2403   if (idx) {
2404     for (i=0; i<A->rmap->n; i++) {
2405       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2406     }
2407   }
2408 
2409   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2410   if (idx) {
2411     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2412   }
2413   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2414   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2415 
2416   for (i=0; i<A->rmap->n; i++) {
2417     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2418       va[i] = vb[i];
2419       if (idx) idx[i] = a->garray[idxb[i]];
2420     }
2421   }
2422 
2423   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2424   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2425   ierr = PetscFree(idxb);CHKERRQ(ierr);
2426   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2432 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2433 {
2434   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2435   PetscErrorCode ierr;
2436   PetscInt       i,*idxb = 0;
2437   PetscScalar    *va,*vb;
2438   Vec            vtmp;
2439 
2440   PetscFunctionBegin;
2441   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2442   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2443   if (idx) {
2444     for (i=0; i<A->cmap->n; i++) {
2445       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2446     }
2447   }
2448 
2449   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2450   if (idx) {
2451     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2452   }
2453   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2454   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2455 
2456   for (i=0; i<A->rmap->n; i++) {
2457     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2458       va[i] = vb[i];
2459       if (idx) idx[i] = a->garray[idxb[i]];
2460     }
2461   }
2462 
2463   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2464   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2465   ierr = PetscFree(idxb);CHKERRQ(ierr);
2466   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2472 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2473 {
2474   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2475   PetscInt       n      = A->rmap->n;
2476   PetscInt       cstart = A->cmap->rstart;
2477   PetscInt       *cmap  = mat->garray;
2478   PetscInt       *diagIdx, *offdiagIdx;
2479   Vec            diagV, offdiagV;
2480   PetscScalar    *a, *diagA, *offdiagA;
2481   PetscInt       r;
2482   PetscErrorCode ierr;
2483 
2484   PetscFunctionBegin;
2485   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2486   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2487   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2488   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2489   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2490   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2491   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2492   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2493   for (r = 0; r < n; ++r) {
2494     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2495       a[r]   = diagA[r];
2496       idx[r] = cstart + diagIdx[r];
2497     } else {
2498       a[r]   = offdiagA[r];
2499       idx[r] = cmap[offdiagIdx[r]];
2500     }
2501   }
2502   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2503   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2504   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2505   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2506   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2507   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 #undef __FUNCT__
2512 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2513 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2514 {
2515   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2516   PetscInt       n      = A->rmap->n;
2517   PetscInt       cstart = A->cmap->rstart;
2518   PetscInt       *cmap  = mat->garray;
2519   PetscInt       *diagIdx, *offdiagIdx;
2520   Vec            diagV, offdiagV;
2521   PetscScalar    *a, *diagA, *offdiagA;
2522   PetscInt       r;
2523   PetscErrorCode ierr;
2524 
2525   PetscFunctionBegin;
2526   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2527   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2528   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2529   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2530   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2531   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2532   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2533   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2534   for (r = 0; r < n; ++r) {
2535     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2536       a[r]   = diagA[r];
2537       idx[r] = cstart + diagIdx[r];
2538     } else {
2539       a[r]   = offdiagA[r];
2540       idx[r] = cmap[offdiagIdx[r]];
2541     }
2542   }
2543   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2544   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2545   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2546   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2547   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2548   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 #undef __FUNCT__
2553 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2554 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2555 {
2556   PetscErrorCode ierr;
2557   Mat            *dummy;
2558 
2559   PetscFunctionBegin;
2560   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2561   *newmat = *dummy;
2562   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 #undef __FUNCT__
2567 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2568 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2569 {
2570   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2571   PetscErrorCode ierr;
2572 
2573   PetscFunctionBegin;
2574   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 #undef __FUNCT__
2579 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2580 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2581 {
2582   PetscErrorCode ierr;
2583   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2584 
2585   PetscFunctionBegin;
2586   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2587   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2588   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2589   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 #undef __FUNCT__
2594 #define __FUNCT__ "MatShift_MPIAIJ"
2595 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2596 {
2597   PetscErrorCode ierr;
2598   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2599   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data,*bij = (Mat_SeqAIJ*)maij->B->data;
2600 
2601   PetscFunctionBegin;
2602   if (!aij->nz && !bij->nz) {
2603     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2604   }
2605   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2606   PetscFunctionReturn(0);
2607 }
2608 
2609 /* -------------------------------------------------------------------*/
2610 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2611                                        MatGetRow_MPIAIJ,
2612                                        MatRestoreRow_MPIAIJ,
2613                                        MatMult_MPIAIJ,
2614                                 /* 4*/ MatMultAdd_MPIAIJ,
2615                                        MatMultTranspose_MPIAIJ,
2616                                        MatMultTransposeAdd_MPIAIJ,
2617 #if defined(PETSC_HAVE_PBGL)
2618                                        MatSolve_MPIAIJ,
2619 #else
2620                                        0,
2621 #endif
2622                                        0,
2623                                        0,
2624                                 /*10*/ 0,
2625                                        0,
2626                                        0,
2627                                        MatSOR_MPIAIJ,
2628                                        MatTranspose_MPIAIJ,
2629                                 /*15*/ MatGetInfo_MPIAIJ,
2630                                        MatEqual_MPIAIJ,
2631                                        MatGetDiagonal_MPIAIJ,
2632                                        MatDiagonalScale_MPIAIJ,
2633                                        MatNorm_MPIAIJ,
2634                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2635                                        MatAssemblyEnd_MPIAIJ,
2636                                        MatSetOption_MPIAIJ,
2637                                        MatZeroEntries_MPIAIJ,
2638                                 /*24*/ MatZeroRows_MPIAIJ,
2639                                        0,
2640 #if defined(PETSC_HAVE_PBGL)
2641                                        0,
2642 #else
2643                                        0,
2644 #endif
2645                                        0,
2646                                        0,
2647                                 /*29*/ MatSetUp_MPIAIJ,
2648 #if defined(PETSC_HAVE_PBGL)
2649                                        0,
2650 #else
2651                                        0,
2652 #endif
2653                                        0,
2654                                        0,
2655                                        0,
2656                                 /*34*/ MatDuplicate_MPIAIJ,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                 /*39*/ MatAXPY_MPIAIJ,
2662                                        MatGetSubMatrices_MPIAIJ,
2663                                        MatIncreaseOverlap_MPIAIJ,
2664                                        MatGetValues_MPIAIJ,
2665                                        MatCopy_MPIAIJ,
2666                                 /*44*/ MatGetRowMax_MPIAIJ,
2667                                        MatScale_MPIAIJ,
2668                                        MatShift_MPIAIJ,
2669                                        MatDiagonalSet_MPIAIJ,
2670                                        MatZeroRowsColumns_MPIAIJ,
2671                                 /*49*/ MatSetRandom_MPIAIJ,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2677                                        0,
2678                                        MatSetUnfactored_MPIAIJ,
2679                                        MatPermute_MPIAIJ,
2680                                        0,
2681                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2682                                        MatDestroy_MPIAIJ,
2683                                        MatView_MPIAIJ,
2684                                        0,
2685                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2686                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2687                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2692                                        MatGetRowMinAbs_MPIAIJ,
2693                                        0,
2694                                        MatSetColoring_MPIAIJ,
2695                                        0,
2696                                        MatSetValuesAdifor_MPIAIJ,
2697                                 /*75*/ MatFDColoringApply_AIJ,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        MatFindZeroDiagonals_MPIAIJ,
2702                                 /*80*/ 0,
2703                                        0,
2704                                        0,
2705                                 /*83*/ MatLoad_MPIAIJ,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2712                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2713                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2714                                        MatPtAP_MPIAIJ_MPIAIJ,
2715                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2716                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                 /*99*/ 0,
2722                                        0,
2723                                        0,
2724                                        MatConjugate_MPIAIJ,
2725                                        0,
2726                                 /*104*/MatSetValuesRow_MPIAIJ,
2727                                        MatRealPart_MPIAIJ,
2728                                        MatImaginaryPart_MPIAIJ,
2729                                        0,
2730                                        0,
2731                                 /*109*/0,
2732                                        0,
2733                                        MatGetRowMin_MPIAIJ,
2734                                        0,
2735                                        0,
2736                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2737                                        0,
2738                                        MatGetGhosts_MPIAIJ,
2739                                        0,
2740                                        0,
2741                                 /*119*/0,
2742                                        0,
2743                                        0,
2744                                        0,
2745                                        MatGetMultiProcBlock_MPIAIJ,
2746                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2747                                        MatGetColumnNorms_MPIAIJ,
2748                                        MatInvertBlockDiagonal_MPIAIJ,
2749                                        0,
2750                                        MatGetSubMatricesMPI_MPIAIJ,
2751                                 /*129*/0,
2752                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2753                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2754                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2755                                        0,
2756                                 /*134*/0,
2757                                        0,
2758                                        0,
2759                                        0,
2760                                        0,
2761                                 /*139*/0,
2762                                        0,
2763                                        0,
2764                                        MatFDColoringSetUp_MPIXAIJ,
2765                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2766                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2767 };
2768 
2769 /* ----------------------------------------------------------------------------------------*/
2770 
2771 #undef __FUNCT__
2772 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2773 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2774 {
2775   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2776   PetscErrorCode ierr;
2777 
2778   PetscFunctionBegin;
2779   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2780   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2781   PetscFunctionReturn(0);
2782 }
2783 
2784 #undef __FUNCT__
2785 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2786 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2787 {
2788   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2789   PetscErrorCode ierr;
2790 
2791   PetscFunctionBegin;
2792   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2793   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2794   PetscFunctionReturn(0);
2795 }
2796 
2797 #undef __FUNCT__
2798 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2799 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2800 {
2801   Mat_MPIAIJ     *b;
2802   PetscErrorCode ierr;
2803 
2804   PetscFunctionBegin;
2805   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2806   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2807   b = (Mat_MPIAIJ*)B->data;
2808 
2809   if (!B->preallocated) {
2810     /* Explicitly create 2 MATSEQAIJ matrices. */
2811     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2812     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2813     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2814     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2815     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2816     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2817     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2818     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2819     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2820     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2821   }
2822 
2823   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2824   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2825   B->preallocated = PETSC_TRUE;
2826   PetscFunctionReturn(0);
2827 }
2828 
2829 #undef __FUNCT__
2830 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2831 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2832 {
2833   Mat            mat;
2834   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2835   PetscErrorCode ierr;
2836 
2837   PetscFunctionBegin;
2838   *newmat = 0;
2839   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2840   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2841   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2842   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2843   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2844   a       = (Mat_MPIAIJ*)mat->data;
2845 
2846   mat->factortype   = matin->factortype;
2847   mat->assembled    = PETSC_TRUE;
2848   mat->insertmode   = NOT_SET_VALUES;
2849   mat->preallocated = PETSC_TRUE;
2850 
2851   a->size         = oldmat->size;
2852   a->rank         = oldmat->rank;
2853   a->donotstash   = oldmat->donotstash;
2854   a->roworiented  = oldmat->roworiented;
2855   a->rowindices   = 0;
2856   a->rowvalues    = 0;
2857   a->getrowactive = PETSC_FALSE;
2858 
2859   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2860   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2861 
2862   if (oldmat->colmap) {
2863 #if defined(PETSC_USE_CTABLE)
2864     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2865 #else
2866     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2867     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2868     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2869 #endif
2870   } else a->colmap = 0;
2871   if (oldmat->garray) {
2872     PetscInt len;
2873     len  = oldmat->B->cmap->n;
2874     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2875     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2876     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2877   } else a->garray = 0;
2878 
2879   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2880   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2881   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2882   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2883   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2884   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2885   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2886   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2887   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2888   *newmat = mat;
2889   PetscFunctionReturn(0);
2890 }
2891 
2892 
2893 
2894 #undef __FUNCT__
2895 #define __FUNCT__ "MatLoad_MPIAIJ"
2896 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2897 {
2898   PetscScalar    *vals,*svals;
2899   MPI_Comm       comm;
2900   PetscErrorCode ierr;
2901   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2902   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2903   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2904   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2905   PetscInt       cend,cstart,n,*rowners;
2906   int            fd;
2907   PetscInt       bs = newMat->rmap->bs;
2908 
2909   PetscFunctionBegin;
2910   /* force binary viewer to load .info file if it has not yet done so */
2911   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2912   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2913   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2914   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2915   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2916   if (!rank) {
2917     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2918     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2919   }
2920 
2921   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2922   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2923   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2924   if (bs < 0) bs = 1;
2925 
2926   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2927   M    = header[1]; N = header[2];
2928 
2929   /* If global sizes are set, check if they are consistent with that given in the file */
2930   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2931   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2932 
2933   /* determine ownership of all (block) rows */
2934   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2935   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2936   else m = newMat->rmap->n; /* Set by user */
2937 
2938   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2939   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2940 
2941   /* First process needs enough room for process with most rows */
2942   if (!rank) {
2943     mmax = rowners[1];
2944     for (i=2; i<=size; i++) {
2945       mmax = PetscMax(mmax, rowners[i]);
2946     }
2947   } else mmax = -1;             /* unused, but compilers complain */
2948 
2949   rowners[0] = 0;
2950   for (i=2; i<=size; i++) {
2951     rowners[i] += rowners[i-1];
2952   }
2953   rstart = rowners[rank];
2954   rend   = rowners[rank+1];
2955 
2956   /* distribute row lengths to all processors */
2957   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2958   if (!rank) {
2959     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2960     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2961     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2962     for (j=0; j<m; j++) {
2963       procsnz[0] += ourlens[j];
2964     }
2965     for (i=1; i<size; i++) {
2966       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2967       /* calculate the number of nonzeros on each processor */
2968       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2969         procsnz[i] += rowlengths[j];
2970       }
2971       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2972     }
2973     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2974   } else {
2975     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2976   }
2977 
2978   if (!rank) {
2979     /* determine max buffer needed and allocate it */
2980     maxnz = 0;
2981     for (i=0; i<size; i++) {
2982       maxnz = PetscMax(maxnz,procsnz[i]);
2983     }
2984     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2985 
2986     /* read in my part of the matrix column indices  */
2987     nz   = procsnz[0];
2988     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2989     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2990 
2991     /* read in every one elses and ship off */
2992     for (i=1; i<size; i++) {
2993       nz   = procsnz[i];
2994       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2995       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2996     }
2997     ierr = PetscFree(cols);CHKERRQ(ierr);
2998   } else {
2999     /* determine buffer space needed for message */
3000     nz = 0;
3001     for (i=0; i<m; i++) {
3002       nz += ourlens[i];
3003     }
3004     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3005 
3006     /* receive message of column indices*/
3007     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3008   }
3009 
3010   /* determine column ownership if matrix is not square */
3011   if (N != M) {
3012     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3013     else n = newMat->cmap->n;
3014     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3015     cstart = cend - n;
3016   } else {
3017     cstart = rstart;
3018     cend   = rend;
3019     n      = cend - cstart;
3020   }
3021 
3022   /* loop over local rows, determining number of off diagonal entries */
3023   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3024   jj   = 0;
3025   for (i=0; i<m; i++) {
3026     for (j=0; j<ourlens[i]; j++) {
3027       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3028       jj++;
3029     }
3030   }
3031 
3032   for (i=0; i<m; i++) {
3033     ourlens[i] -= offlens[i];
3034   }
3035   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3036 
3037   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3038 
3039   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3040 
3041   for (i=0; i<m; i++) {
3042     ourlens[i] += offlens[i];
3043   }
3044 
3045   if (!rank) {
3046     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3047 
3048     /* read in my part of the matrix numerical values  */
3049     nz   = procsnz[0];
3050     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3051 
3052     /* insert into matrix */
3053     jj      = rstart;
3054     smycols = mycols;
3055     svals   = vals;
3056     for (i=0; i<m; i++) {
3057       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3058       smycols += ourlens[i];
3059       svals   += ourlens[i];
3060       jj++;
3061     }
3062 
3063     /* read in other processors and ship out */
3064     for (i=1; i<size; i++) {
3065       nz   = procsnz[i];
3066       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3067       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3068     }
3069     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3070   } else {
3071     /* receive numeric values */
3072     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3073 
3074     /* receive message of values*/
3075     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3076 
3077     /* insert into matrix */
3078     jj      = rstart;
3079     smycols = mycols;
3080     svals   = vals;
3081     for (i=0; i<m; i++) {
3082       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3083       smycols += ourlens[i];
3084       svals   += ourlens[i];
3085       jj++;
3086     }
3087   }
3088   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3089   ierr = PetscFree(vals);CHKERRQ(ierr);
3090   ierr = PetscFree(mycols);CHKERRQ(ierr);
3091   ierr = PetscFree(rowners);CHKERRQ(ierr);
3092   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3093   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3094   PetscFunctionReturn(0);
3095 }
3096 
3097 #undef __FUNCT__
3098 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3099 /* TODO: Not scalable because of ISAllGather(). */
3100 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3101 {
3102   PetscErrorCode ierr;
3103   IS             iscol_local;
3104   PetscInt       csize;
3105 
3106   PetscFunctionBegin;
3107   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3108   if (call == MAT_REUSE_MATRIX) {
3109     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3110     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3111   } else {
3112     PetscInt cbs;
3113     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3114     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3115     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3116   }
3117   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3118   if (call == MAT_INITIAL_MATRIX) {
3119     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3120     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3121   }
3122   PetscFunctionReturn(0);
3123 }
3124 
3125 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3126 #undef __FUNCT__
3127 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3128 /*
3129     Not great since it makes two copies of the submatrix, first an SeqAIJ
3130   in local and then by concatenating the local matrices the end result.
3131   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3132 
3133   Note: This requires a sequential iscol with all indices.
3134 */
3135 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3136 {
3137   PetscErrorCode ierr;
3138   PetscMPIInt    rank,size;
3139   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3140   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3141   PetscBool      allcolumns, colflag;
3142   Mat            M,Mreuse;
3143   MatScalar      *vwork,*aa;
3144   MPI_Comm       comm;
3145   Mat_SeqAIJ     *aij;
3146 
3147   PetscFunctionBegin;
3148   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3149   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3150   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3151 
3152   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3153   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3154   if (colflag && ncol == mat->cmap->N) {
3155     allcolumns = PETSC_TRUE;
3156   } else {
3157     allcolumns = PETSC_FALSE;
3158   }
3159   if (call ==  MAT_REUSE_MATRIX) {
3160     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3161     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3162     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3163   } else {
3164     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3165   }
3166 
3167   /*
3168       m - number of local rows
3169       n - number of columns (same on all processors)
3170       rstart - first row in new global matrix generated
3171   */
3172   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3173   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3174   if (call == MAT_INITIAL_MATRIX) {
3175     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3176     ii  = aij->i;
3177     jj  = aij->j;
3178 
3179     /*
3180         Determine the number of non-zeros in the diagonal and off-diagonal
3181         portions of the matrix in order to do correct preallocation
3182     */
3183 
3184     /* first get start and end of "diagonal" columns */
3185     if (csize == PETSC_DECIDE) {
3186       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3187       if (mglobal == n) { /* square matrix */
3188         nlocal = m;
3189       } else {
3190         nlocal = n/size + ((n % size) > rank);
3191       }
3192     } else {
3193       nlocal = csize;
3194     }
3195     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3196     rstart = rend - nlocal;
3197     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3198 
3199     /* next, compute all the lengths */
3200     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3201     olens = dlens + m;
3202     for (i=0; i<m; i++) {
3203       jend = ii[i+1] - ii[i];
3204       olen = 0;
3205       dlen = 0;
3206       for (j=0; j<jend; j++) {
3207         if (*jj < rstart || *jj >= rend) olen++;
3208         else dlen++;
3209         jj++;
3210       }
3211       olens[i] = olen;
3212       dlens[i] = dlen;
3213     }
3214     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3215     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3216     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3217     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3218     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3219     ierr = PetscFree(dlens);CHKERRQ(ierr);
3220   } else {
3221     PetscInt ml,nl;
3222 
3223     M    = *newmat;
3224     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3225     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3226     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3227     /*
3228          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3229        rather than the slower MatSetValues().
3230     */
3231     M->was_assembled = PETSC_TRUE;
3232     M->assembled     = PETSC_FALSE;
3233   }
3234   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3235   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3236   ii   = aij->i;
3237   jj   = aij->j;
3238   aa   = aij->a;
3239   for (i=0; i<m; i++) {
3240     row   = rstart + i;
3241     nz    = ii[i+1] - ii[i];
3242     cwork = jj;     jj += nz;
3243     vwork = aa;     aa += nz;
3244     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3245   }
3246 
3247   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3248   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3249   *newmat = M;
3250 
3251   /* save submatrix used in processor for next request */
3252   if (call ==  MAT_INITIAL_MATRIX) {
3253     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3254     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3255   }
3256   PetscFunctionReturn(0);
3257 }
3258 
3259 #undef __FUNCT__
3260 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3261 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3262 {
3263   PetscInt       m,cstart, cend,j,nnz,i,d;
3264   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3265   const PetscInt *JJ;
3266   PetscScalar    *values;
3267   PetscErrorCode ierr;
3268 
3269   PetscFunctionBegin;
3270   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3271 
3272   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3273   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3274   m      = B->rmap->n;
3275   cstart = B->cmap->rstart;
3276   cend   = B->cmap->rend;
3277   rstart = B->rmap->rstart;
3278 
3279   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3280 
3281 #if defined(PETSC_USE_DEBUGGING)
3282   for (i=0; i<m; i++) {
3283     nnz = Ii[i+1]- Ii[i];
3284     JJ  = J + Ii[i];
3285     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3286     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3287     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3288   }
3289 #endif
3290 
3291   for (i=0; i<m; i++) {
3292     nnz     = Ii[i+1]- Ii[i];
3293     JJ      = J + Ii[i];
3294     nnz_max = PetscMax(nnz_max,nnz);
3295     d       = 0;
3296     for (j=0; j<nnz; j++) {
3297       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3298     }
3299     d_nnz[i] = d;
3300     o_nnz[i] = nnz - d;
3301   }
3302   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3303   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3304 
3305   if (v) values = (PetscScalar*)v;
3306   else {
3307     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3308   }
3309 
3310   for (i=0; i<m; i++) {
3311     ii   = i + rstart;
3312     nnz  = Ii[i+1]- Ii[i];
3313     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3314   }
3315   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3316   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3317 
3318   if (!v) {
3319     ierr = PetscFree(values);CHKERRQ(ierr);
3320   }
3321   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3322   PetscFunctionReturn(0);
3323 }
3324 
3325 #undef __FUNCT__
3326 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3327 /*@
3328    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3329    (the default parallel PETSc format).
3330 
3331    Collective on MPI_Comm
3332 
3333    Input Parameters:
3334 +  B - the matrix
3335 .  i - the indices into j for the start of each local row (starts with zero)
3336 .  j - the column indices for each local row (starts with zero)
3337 -  v - optional values in the matrix
3338 
3339    Level: developer
3340 
3341    Notes:
3342        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3343      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3344      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3345 
3346        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3347 
3348        The format which is used for the sparse matrix input, is equivalent to a
3349     row-major ordering.. i.e for the following matrix, the input data expected is
3350     as shown:
3351 
3352         1 0 0
3353         2 0 3     P0
3354        -------
3355         4 5 6     P1
3356 
3357      Process0 [P0]: rows_owned=[0,1]
3358         i =  {0,1,3}  [size = nrow+1  = 2+1]
3359         j =  {0,0,2}  [size = nz = 6]
3360         v =  {1,2,3}  [size = nz = 6]
3361 
3362      Process1 [P1]: rows_owned=[2]
3363         i =  {0,3}    [size = nrow+1  = 1+1]
3364         j =  {0,1,2}  [size = nz = 6]
3365         v =  {4,5,6}  [size = nz = 6]
3366 
3367 .keywords: matrix, aij, compressed row, sparse, parallel
3368 
3369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3370           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3371 @*/
3372 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3373 {
3374   PetscErrorCode ierr;
3375 
3376   PetscFunctionBegin;
3377   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3378   PetscFunctionReturn(0);
3379 }
3380 
3381 #undef __FUNCT__
3382 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3383 /*@C
3384    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3385    (the default parallel PETSc format).  For good matrix assembly performance
3386    the user should preallocate the matrix storage by setting the parameters
3387    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3388    performance can be increased by more than a factor of 50.
3389 
3390    Collective on MPI_Comm
3391 
3392    Input Parameters:
3393 +  B - the matrix
3394 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3395            (same value is used for all local rows)
3396 .  d_nnz - array containing the number of nonzeros in the various rows of the
3397            DIAGONAL portion of the local submatrix (possibly different for each row)
3398            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3399            The size of this array is equal to the number of local rows, i.e 'm'.
3400            For matrices that will be factored, you must leave room for (and set)
3401            the diagonal entry even if it is zero.
3402 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3403            submatrix (same value is used for all local rows).
3404 -  o_nnz - array containing the number of nonzeros in the various rows of the
3405            OFF-DIAGONAL portion of the local submatrix (possibly different for
3406            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3407            structure. The size of this array is equal to the number
3408            of local rows, i.e 'm'.
3409 
3410    If the *_nnz parameter is given then the *_nz parameter is ignored
3411 
3412    The AIJ format (also called the Yale sparse matrix format or
3413    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3414    storage.  The stored row and column indices begin with zero.
3415    See Users-Manual: ch_mat for details.
3416 
3417    The parallel matrix is partitioned such that the first m0 rows belong to
3418    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3419    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3420 
3421    The DIAGONAL portion of the local submatrix of a processor can be defined
3422    as the submatrix which is obtained by extraction the part corresponding to
3423    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3424    first row that belongs to the processor, r2 is the last row belonging to
3425    the this processor, and c1-c2 is range of indices of the local part of a
3426    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3427    common case of a square matrix, the row and column ranges are the same and
3428    the DIAGONAL part is also square. The remaining portion of the local
3429    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3430 
3431    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3432 
3433    You can call MatGetInfo() to get information on how effective the preallocation was;
3434    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3435    You can also run with the option -info and look for messages with the string
3436    malloc in them to see if additional memory allocation was needed.
3437 
3438    Example usage:
3439 
3440    Consider the following 8x8 matrix with 34 non-zero values, that is
3441    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3442    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3443    as follows:
3444 
3445 .vb
3446             1  2  0  |  0  3  0  |  0  4
3447     Proc0   0  5  6  |  7  0  0  |  8  0
3448             9  0 10  | 11  0  0  | 12  0
3449     -------------------------------------
3450            13  0 14  | 15 16 17  |  0  0
3451     Proc1   0 18  0  | 19 20 21  |  0  0
3452             0  0  0  | 22 23  0  | 24  0
3453     -------------------------------------
3454     Proc2  25 26 27  |  0  0 28  | 29  0
3455            30  0  0  | 31 32 33  |  0 34
3456 .ve
3457 
3458    This can be represented as a collection of submatrices as:
3459 
3460 .vb
3461       A B C
3462       D E F
3463       G H I
3464 .ve
3465 
3466    Where the submatrices A,B,C are owned by proc0, D,E,F are
3467    owned by proc1, G,H,I are owned by proc2.
3468 
3469    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3470    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3471    The 'M','N' parameters are 8,8, and have the same values on all procs.
3472 
3473    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3474    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3475    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3476    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3477    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3478    matrix, ans [DF] as another SeqAIJ matrix.
3479 
3480    When d_nz, o_nz parameters are specified, d_nz storage elements are
3481    allocated for every row of the local diagonal submatrix, and o_nz
3482    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3483    One way to choose d_nz and o_nz is to use the max nonzerors per local
3484    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3485    In this case, the values of d_nz,o_nz are:
3486 .vb
3487      proc0 : dnz = 2, o_nz = 2
3488      proc1 : dnz = 3, o_nz = 2
3489      proc2 : dnz = 1, o_nz = 4
3490 .ve
3491    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3492    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3493    for proc3. i.e we are using 12+15+10=37 storage locations to store
3494    34 values.
3495 
3496    When d_nnz, o_nnz parameters are specified, the storage is specified
3497    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3498    In the above case the values for d_nnz,o_nnz are:
3499 .vb
3500      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3501      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3502      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3503 .ve
3504    Here the space allocated is sum of all the above values i.e 34, and
3505    hence pre-allocation is perfect.
3506 
3507    Level: intermediate
3508 
3509 .keywords: matrix, aij, compressed row, sparse, parallel
3510 
3511 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3512           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3513 @*/
3514 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3515 {
3516   PetscErrorCode ierr;
3517 
3518   PetscFunctionBegin;
3519   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3520   PetscValidType(B,1);
3521   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3522   PetscFunctionReturn(0);
3523 }
3524 
3525 #undef __FUNCT__
3526 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3527 /*@
3528      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3529          CSR format the local rows.
3530 
3531    Collective on MPI_Comm
3532 
3533    Input Parameters:
3534 +  comm - MPI communicator
3535 .  m - number of local rows (Cannot be PETSC_DECIDE)
3536 .  n - This value should be the same as the local size used in creating the
3537        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3538        calculated if N is given) For square matrices n is almost always m.
3539 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3540 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3541 .   i - row indices
3542 .   j - column indices
3543 -   a - matrix values
3544 
3545    Output Parameter:
3546 .   mat - the matrix
3547 
3548    Level: intermediate
3549 
3550    Notes:
3551        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3552      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3553      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3554 
3555        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3556 
3557        The format which is used for the sparse matrix input, is equivalent to a
3558     row-major ordering.. i.e for the following matrix, the input data expected is
3559     as shown:
3560 
3561         1 0 0
3562         2 0 3     P0
3563        -------
3564         4 5 6     P1
3565 
3566      Process0 [P0]: rows_owned=[0,1]
3567         i =  {0,1,3}  [size = nrow+1  = 2+1]
3568         j =  {0,0,2}  [size = nz = 6]
3569         v =  {1,2,3}  [size = nz = 6]
3570 
3571      Process1 [P1]: rows_owned=[2]
3572         i =  {0,3}    [size = nrow+1  = 1+1]
3573         j =  {0,1,2}  [size = nz = 6]
3574         v =  {4,5,6}  [size = nz = 6]
3575 
3576 .keywords: matrix, aij, compressed row, sparse, parallel
3577 
3578 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3579           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3580 @*/
3581 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3582 {
3583   PetscErrorCode ierr;
3584 
3585   PetscFunctionBegin;
3586   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3587   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3588   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3589   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3590   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3591   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3592   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3593   PetscFunctionReturn(0);
3594 }
3595 
3596 #undef __FUNCT__
3597 #define __FUNCT__ "MatCreateAIJ"
3598 /*@C
3599    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3600    (the default parallel PETSc format).  For good matrix assembly performance
3601    the user should preallocate the matrix storage by setting the parameters
3602    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3603    performance can be increased by more than a factor of 50.
3604 
3605    Collective on MPI_Comm
3606 
3607    Input Parameters:
3608 +  comm - MPI communicator
3609 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3610            This value should be the same as the local size used in creating the
3611            y vector for the matrix-vector product y = Ax.
3612 .  n - This value should be the same as the local size used in creating the
3613        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3614        calculated if N is given) For square matrices n is almost always m.
3615 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3616 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3617 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3618            (same value is used for all local rows)
3619 .  d_nnz - array containing the number of nonzeros in the various rows of the
3620            DIAGONAL portion of the local submatrix (possibly different for each row)
3621            or NULL, if d_nz is used to specify the nonzero structure.
3622            The size of this array is equal to the number of local rows, i.e 'm'.
3623 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3624            submatrix (same value is used for all local rows).
3625 -  o_nnz - array containing the number of nonzeros in the various rows of the
3626            OFF-DIAGONAL portion of the local submatrix (possibly different for
3627            each row) or NULL, if o_nz is used to specify the nonzero
3628            structure. The size of this array is equal to the number
3629            of local rows, i.e 'm'.
3630 
3631    Output Parameter:
3632 .  A - the matrix
3633 
3634    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3635    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3636    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3637 
3638    Notes:
3639    If the *_nnz parameter is given then the *_nz parameter is ignored
3640 
3641    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3642    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3643    storage requirements for this matrix.
3644 
3645    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3646    processor than it must be used on all processors that share the object for
3647    that argument.
3648 
3649    The user MUST specify either the local or global matrix dimensions
3650    (possibly both).
3651 
3652    The parallel matrix is partitioned across processors such that the
3653    first m0 rows belong to process 0, the next m1 rows belong to
3654    process 1, the next m2 rows belong to process 2 etc.. where
3655    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3656    values corresponding to [m x N] submatrix.
3657 
3658    The columns are logically partitioned with the n0 columns belonging
3659    to 0th partition, the next n1 columns belonging to the next
3660    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3661 
3662    The DIAGONAL portion of the local submatrix on any given processor
3663    is the submatrix corresponding to the rows and columns m,n
3664    corresponding to the given processor. i.e diagonal matrix on
3665    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3666    etc. The remaining portion of the local submatrix [m x (N-n)]
3667    constitute the OFF-DIAGONAL portion. The example below better
3668    illustrates this concept.
3669 
3670    For a square global matrix we define each processor's diagonal portion
3671    to be its local rows and the corresponding columns (a square submatrix);
3672    each processor's off-diagonal portion encompasses the remainder of the
3673    local matrix (a rectangular submatrix).
3674 
3675    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3676 
3677    When calling this routine with a single process communicator, a matrix of
3678    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3679    type of communicator, use the construction mechanism:
3680      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3681 
3682    By default, this format uses inodes (identical nodes) when possible.
3683    We search for consecutive rows with the same nonzero structure, thereby
3684    reusing matrix information to achieve increased efficiency.
3685 
3686    Options Database Keys:
3687 +  -mat_no_inode  - Do not use inodes
3688 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3689 -  -mat_aij_oneindex - Internally use indexing starting at 1
3690         rather than 0.  Note that when calling MatSetValues(),
3691         the user still MUST index entries starting at 0!
3692 
3693 
3694    Example usage:
3695 
3696    Consider the following 8x8 matrix with 34 non-zero values, that is
3697    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3698    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3699    as follows:
3700 
3701 .vb
3702             1  2  0  |  0  3  0  |  0  4
3703     Proc0   0  5  6  |  7  0  0  |  8  0
3704             9  0 10  | 11  0  0  | 12  0
3705     -------------------------------------
3706            13  0 14  | 15 16 17  |  0  0
3707     Proc1   0 18  0  | 19 20 21  |  0  0
3708             0  0  0  | 22 23  0  | 24  0
3709     -------------------------------------
3710     Proc2  25 26 27  |  0  0 28  | 29  0
3711            30  0  0  | 31 32 33  |  0 34
3712 .ve
3713 
3714    This can be represented as a collection of submatrices as:
3715 
3716 .vb
3717       A B C
3718       D E F
3719       G H I
3720 .ve
3721 
3722    Where the submatrices A,B,C are owned by proc0, D,E,F are
3723    owned by proc1, G,H,I are owned by proc2.
3724 
3725    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3726    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3727    The 'M','N' parameters are 8,8, and have the same values on all procs.
3728 
3729    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3730    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3731    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3732    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3733    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3734    matrix, ans [DF] as another SeqAIJ matrix.
3735 
3736    When d_nz, o_nz parameters are specified, d_nz storage elements are
3737    allocated for every row of the local diagonal submatrix, and o_nz
3738    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3739    One way to choose d_nz and o_nz is to use the max nonzerors per local
3740    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3741    In this case, the values of d_nz,o_nz are:
3742 .vb
3743      proc0 : dnz = 2, o_nz = 2
3744      proc1 : dnz = 3, o_nz = 2
3745      proc2 : dnz = 1, o_nz = 4
3746 .ve
3747    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3748    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3749    for proc3. i.e we are using 12+15+10=37 storage locations to store
3750    34 values.
3751 
3752    When d_nnz, o_nnz parameters are specified, the storage is specified
3753    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3754    In the above case the values for d_nnz,o_nnz are:
3755 .vb
3756      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3757      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3758      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3759 .ve
3760    Here the space allocated is sum of all the above values i.e 34, and
3761    hence pre-allocation is perfect.
3762 
3763    Level: intermediate
3764 
3765 .keywords: matrix, aij, compressed row, sparse, parallel
3766 
3767 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3768           MPIAIJ, MatCreateMPIAIJWithArrays()
3769 @*/
3770 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3771 {
3772   PetscErrorCode ierr;
3773   PetscMPIInt    size;
3774 
3775   PetscFunctionBegin;
3776   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3777   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3778   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3779   if (size > 1) {
3780     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3781     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3782   } else {
3783     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3784     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3785   }
3786   PetscFunctionReturn(0);
3787 }
3788 
3789 #undef __FUNCT__
3790 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3791 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3792 {
3793   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3794 
3795   PetscFunctionBegin;
3796   if (Ad)     *Ad     = a->A;
3797   if (Ao)     *Ao     = a->B;
3798   if (colmap) *colmap = a->garray;
3799   PetscFunctionReturn(0);
3800 }
3801 
3802 #undef __FUNCT__
3803 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3804 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3805 {
3806   PetscErrorCode ierr;
3807   PetscInt       i;
3808   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3809 
3810   PetscFunctionBegin;
3811   if (coloring->ctype == IS_COLORING_GLOBAL) {
3812     ISColoringValue *allcolors,*colors;
3813     ISColoring      ocoloring;
3814 
3815     /* set coloring for diagonal portion */
3816     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3817 
3818     /* set coloring for off-diagonal portion */
3819     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3820     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3821     for (i=0; i<a->B->cmap->n; i++) {
3822       colors[i] = allcolors[a->garray[i]];
3823     }
3824     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3825     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3826     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3827     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3828   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3829     ISColoringValue *colors;
3830     PetscInt        *larray;
3831     ISColoring      ocoloring;
3832 
3833     /* set coloring for diagonal portion */
3834     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3835     for (i=0; i<a->A->cmap->n; i++) {
3836       larray[i] = i + A->cmap->rstart;
3837     }
3838     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3839     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3840     for (i=0; i<a->A->cmap->n; i++) {
3841       colors[i] = coloring->colors[larray[i]];
3842     }
3843     ierr = PetscFree(larray);CHKERRQ(ierr);
3844     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3845     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3846     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3847 
3848     /* set coloring for off-diagonal portion */
3849     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3850     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3851     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3852     for (i=0; i<a->B->cmap->n; i++) {
3853       colors[i] = coloring->colors[larray[i]];
3854     }
3855     ierr = PetscFree(larray);CHKERRQ(ierr);
3856     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3857     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3858     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3859   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3860   PetscFunctionReturn(0);
3861 }
3862 
3863 #undef __FUNCT__
3864 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3865 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3866 {
3867   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3868   PetscErrorCode ierr;
3869 
3870   PetscFunctionBegin;
3871   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3872   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3873   PetscFunctionReturn(0);
3874 }
3875 
3876 #undef __FUNCT__
3877 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3878 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3879 {
3880   PetscErrorCode ierr;
3881   PetscInt       m,N,i,rstart,nnz,Ii;
3882   PetscInt       *indx;
3883   PetscScalar    *values;
3884 
3885   PetscFunctionBegin;
3886   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3887   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3888     PetscInt       *dnz,*onz,sum,bs,cbs;
3889 
3890     if (n == PETSC_DECIDE) {
3891       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3892     }
3893     /* Check sum(n) = N */
3894     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3895     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3896 
3897     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3898     rstart -= m;
3899 
3900     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3901     for (i=0; i<m; i++) {
3902       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3903       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3904       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3905     }
3906 
3907     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3908     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3909     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3910     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3911     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3912     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3913     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3914   }
3915 
3916   /* numeric phase */
3917   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3918   for (i=0; i<m; i++) {
3919     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3920     Ii   = i + rstart;
3921     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3922     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3923   }
3924   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3925   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3926   PetscFunctionReturn(0);
3927 }
3928 
3929 #undef __FUNCT__
3930 #define __FUNCT__ "MatFileSplit"
3931 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3932 {
3933   PetscErrorCode    ierr;
3934   PetscMPIInt       rank;
3935   PetscInt          m,N,i,rstart,nnz;
3936   size_t            len;
3937   const PetscInt    *indx;
3938   PetscViewer       out;
3939   char              *name;
3940   Mat               B;
3941   const PetscScalar *values;
3942 
3943   PetscFunctionBegin;
3944   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3945   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3946   /* Should this be the type of the diagonal block of A? */
3947   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3948   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3949   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3950   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3951   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3952   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3953   for (i=0; i<m; i++) {
3954     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3955     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3956     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3957   }
3958   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3959   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3960 
3961   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3962   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3963   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3964   sprintf(name,"%s.%d",outfile,rank);
3965   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3966   ierr = PetscFree(name);CHKERRQ(ierr);
3967   ierr = MatView(B,out);CHKERRQ(ierr);
3968   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3969   ierr = MatDestroy(&B);CHKERRQ(ierr);
3970   PetscFunctionReturn(0);
3971 }
3972 
3973 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3974 #undef __FUNCT__
3975 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3976 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3977 {
3978   PetscErrorCode      ierr;
3979   Mat_Merge_SeqsToMPI *merge;
3980   PetscContainer      container;
3981 
3982   PetscFunctionBegin;
3983   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3984   if (container) {
3985     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3986     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3987     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3988     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3989     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3990     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3991     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3998     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3999     ierr = PetscFree(merge);CHKERRQ(ierr);
4000     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4001   }
4002   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4003   PetscFunctionReturn(0);
4004 }
4005 
4006 #include <../src/mat/utils/freespace.h>
4007 #include <petscbt.h>
4008 
4009 #undef __FUNCT__
4010 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4011 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4012 {
4013   PetscErrorCode      ierr;
4014   MPI_Comm            comm;
4015   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4016   PetscMPIInt         size,rank,taga,*len_s;
4017   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4018   PetscInt            proc,m;
4019   PetscInt            **buf_ri,**buf_rj;
4020   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4021   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4022   MPI_Request         *s_waits,*r_waits;
4023   MPI_Status          *status;
4024   MatScalar           *aa=a->a;
4025   MatScalar           **abuf_r,*ba_i;
4026   Mat_Merge_SeqsToMPI *merge;
4027   PetscContainer      container;
4028 
4029   PetscFunctionBegin;
4030   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4031   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4032 
4033   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4034   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4035 
4036   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4037   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4038 
4039   bi     = merge->bi;
4040   bj     = merge->bj;
4041   buf_ri = merge->buf_ri;
4042   buf_rj = merge->buf_rj;
4043 
4044   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4045   owners = merge->rowmap->range;
4046   len_s  = merge->len_s;
4047 
4048   /* send and recv matrix values */
4049   /*-----------------------------*/
4050   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4051   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4052 
4053   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4054   for (proc=0,k=0; proc<size; proc++) {
4055     if (!len_s[proc]) continue;
4056     i    = owners[proc];
4057     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4058     k++;
4059   }
4060 
4061   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4062   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4063   ierr = PetscFree(status);CHKERRQ(ierr);
4064 
4065   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4066   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4067 
4068   /* insert mat values of mpimat */
4069   /*----------------------------*/
4070   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4071   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4072 
4073   for (k=0; k<merge->nrecv; k++) {
4074     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4075     nrows       = *(buf_ri_k[k]);
4076     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4077     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4078   }
4079 
4080   /* set values of ba */
4081   m = merge->rowmap->n;
4082   for (i=0; i<m; i++) {
4083     arow = owners[rank] + i;
4084     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4085     bnzi = bi[i+1] - bi[i];
4086     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4087 
4088     /* add local non-zero vals of this proc's seqmat into ba */
4089     anzi   = ai[arow+1] - ai[arow];
4090     aj     = a->j + ai[arow];
4091     aa     = a->a + ai[arow];
4092     nextaj = 0;
4093     for (j=0; nextaj<anzi; j++) {
4094       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4095         ba_i[j] += aa[nextaj++];
4096       }
4097     }
4098 
4099     /* add received vals into ba */
4100     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4101       /* i-th row */
4102       if (i == *nextrow[k]) {
4103         anzi   = *(nextai[k]+1) - *nextai[k];
4104         aj     = buf_rj[k] + *(nextai[k]);
4105         aa     = abuf_r[k] + *(nextai[k]);
4106         nextaj = 0;
4107         for (j=0; nextaj<anzi; j++) {
4108           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4109             ba_i[j] += aa[nextaj++];
4110           }
4111         }
4112         nextrow[k]++; nextai[k]++;
4113       }
4114     }
4115     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4116   }
4117   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4118   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4119 
4120   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4121   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4122   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4123   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4124   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4125   PetscFunctionReturn(0);
4126 }
4127 
4128 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4129 
4130 #undef __FUNCT__
4131 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4132 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4133 {
4134   PetscErrorCode      ierr;
4135   Mat                 B_mpi;
4136   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4137   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4138   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4139   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4140   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4141   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4142   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4143   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4144   MPI_Status          *status;
4145   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4146   PetscBT             lnkbt;
4147   Mat_Merge_SeqsToMPI *merge;
4148   PetscContainer      container;
4149 
4150   PetscFunctionBegin;
4151   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4152 
4153   /* make sure it is a PETSc comm */
4154   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4155   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4156   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4157 
4158   ierr = PetscNew(&merge);CHKERRQ(ierr);
4159   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4160 
4161   /* determine row ownership */
4162   /*---------------------------------------------------------*/
4163   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4164   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4165   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4166   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4167   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4168   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4169   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4170 
4171   m      = merge->rowmap->n;
4172   owners = merge->rowmap->range;
4173 
4174   /* determine the number of messages to send, their lengths */
4175   /*---------------------------------------------------------*/
4176   len_s = merge->len_s;
4177 
4178   len          = 0; /* length of buf_si[] */
4179   merge->nsend = 0;
4180   for (proc=0; proc<size; proc++) {
4181     len_si[proc] = 0;
4182     if (proc == rank) {
4183       len_s[proc] = 0;
4184     } else {
4185       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4186       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4187     }
4188     if (len_s[proc]) {
4189       merge->nsend++;
4190       nrows = 0;
4191       for (i=owners[proc]; i<owners[proc+1]; i++) {
4192         if (ai[i+1] > ai[i]) nrows++;
4193       }
4194       len_si[proc] = 2*(nrows+1);
4195       len         += len_si[proc];
4196     }
4197   }
4198 
4199   /* determine the number and length of messages to receive for ij-structure */
4200   /*-------------------------------------------------------------------------*/
4201   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4202   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4203 
4204   /* post the Irecv of j-structure */
4205   /*-------------------------------*/
4206   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4207   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4208 
4209   /* post the Isend of j-structure */
4210   /*--------------------------------*/
4211   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4212 
4213   for (proc=0, k=0; proc<size; proc++) {
4214     if (!len_s[proc]) continue;
4215     i    = owners[proc];
4216     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4217     k++;
4218   }
4219 
4220   /* receives and sends of j-structure are complete */
4221   /*------------------------------------------------*/
4222   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4223   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4224 
4225   /* send and recv i-structure */
4226   /*---------------------------*/
4227   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4228   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4229 
4230   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4231   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4232   for (proc=0,k=0; proc<size; proc++) {
4233     if (!len_s[proc]) continue;
4234     /* form outgoing message for i-structure:
4235          buf_si[0]:                 nrows to be sent
4236                [1:nrows]:           row index (global)
4237                [nrows+1:2*nrows+1]: i-structure index
4238     */
4239     /*-------------------------------------------*/
4240     nrows       = len_si[proc]/2 - 1;
4241     buf_si_i    = buf_si + nrows+1;
4242     buf_si[0]   = nrows;
4243     buf_si_i[0] = 0;
4244     nrows       = 0;
4245     for (i=owners[proc]; i<owners[proc+1]; i++) {
4246       anzi = ai[i+1] - ai[i];
4247       if (anzi) {
4248         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4249         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4250         nrows++;
4251       }
4252     }
4253     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4254     k++;
4255     buf_si += len_si[proc];
4256   }
4257 
4258   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4259   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4260 
4261   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4262   for (i=0; i<merge->nrecv; i++) {
4263     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4264   }
4265 
4266   ierr = PetscFree(len_si);CHKERRQ(ierr);
4267   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4268   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4269   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4270   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4271   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4272   ierr = PetscFree(status);CHKERRQ(ierr);
4273 
4274   /* compute a local seq matrix in each processor */
4275   /*----------------------------------------------*/
4276   /* allocate bi array and free space for accumulating nonzero column info */
4277   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4278   bi[0] = 0;
4279 
4280   /* create and initialize a linked list */
4281   nlnk = N+1;
4282   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4283 
4284   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4285   len  = ai[owners[rank+1]] - ai[owners[rank]];
4286   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4287 
4288   current_space = free_space;
4289 
4290   /* determine symbolic info for each local row */
4291   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4292 
4293   for (k=0; k<merge->nrecv; k++) {
4294     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4295     nrows       = *buf_ri_k[k];
4296     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4297     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4298   }
4299 
4300   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4301   len  = 0;
4302   for (i=0; i<m; i++) {
4303     bnzi = 0;
4304     /* add local non-zero cols of this proc's seqmat into lnk */
4305     arow  = owners[rank] + i;
4306     anzi  = ai[arow+1] - ai[arow];
4307     aj    = a->j + ai[arow];
4308     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4309     bnzi += nlnk;
4310     /* add received col data into lnk */
4311     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4312       if (i == *nextrow[k]) { /* i-th row */
4313         anzi  = *(nextai[k]+1) - *nextai[k];
4314         aj    = buf_rj[k] + *nextai[k];
4315         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4316         bnzi += nlnk;
4317         nextrow[k]++; nextai[k]++;
4318       }
4319     }
4320     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4321 
4322     /* if free space is not available, make more free space */
4323     if (current_space->local_remaining<bnzi) {
4324       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4325       nspacedouble++;
4326     }
4327     /* copy data into free space, then initialize lnk */
4328     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4329     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4330 
4331     current_space->array           += bnzi;
4332     current_space->local_used      += bnzi;
4333     current_space->local_remaining -= bnzi;
4334 
4335     bi[i+1] = bi[i] + bnzi;
4336   }
4337 
4338   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4339 
4340   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4341   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4342   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4343 
4344   /* create symbolic parallel matrix B_mpi */
4345   /*---------------------------------------*/
4346   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4347   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4348   if (n==PETSC_DECIDE) {
4349     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4350   } else {
4351     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4352   }
4353   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4354   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4355   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4356   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4357   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4358 
4359   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4360   B_mpi->assembled    = PETSC_FALSE;
4361   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4362   merge->bi           = bi;
4363   merge->bj           = bj;
4364   merge->buf_ri       = buf_ri;
4365   merge->buf_rj       = buf_rj;
4366   merge->coi          = NULL;
4367   merge->coj          = NULL;
4368   merge->owners_co    = NULL;
4369 
4370   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4371 
4372   /* attach the supporting struct to B_mpi for reuse */
4373   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4374   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4375   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4376   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4377   *mpimat = B_mpi;
4378 
4379   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4380   PetscFunctionReturn(0);
4381 }
4382 
4383 #undef __FUNCT__
4384 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4385 /*@C
4386       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4387                  matrices from each processor
4388 
4389     Collective on MPI_Comm
4390 
4391    Input Parameters:
4392 +    comm - the communicators the parallel matrix will live on
4393 .    seqmat - the input sequential matrices
4394 .    m - number of local rows (or PETSC_DECIDE)
4395 .    n - number of local columns (or PETSC_DECIDE)
4396 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4397 
4398    Output Parameter:
4399 .    mpimat - the parallel matrix generated
4400 
4401     Level: advanced
4402 
4403    Notes:
4404      The dimensions of the sequential matrix in each processor MUST be the same.
4405      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4406      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4407 @*/
4408 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4409 {
4410   PetscErrorCode ierr;
4411   PetscMPIInt    size;
4412 
4413   PetscFunctionBegin;
4414   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4415   if (size == 1) {
4416     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4417     if (scall == MAT_INITIAL_MATRIX) {
4418       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4419     } else {
4420       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4421     }
4422     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4423     PetscFunctionReturn(0);
4424   }
4425   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4426   if (scall == MAT_INITIAL_MATRIX) {
4427     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4428   }
4429   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4430   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4431   PetscFunctionReturn(0);
4432 }
4433 
4434 #undef __FUNCT__
4435 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4436 /*@
4437      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4438           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4439           with MatGetSize()
4440 
4441     Not Collective
4442 
4443    Input Parameters:
4444 +    A - the matrix
4445 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4446 
4447    Output Parameter:
4448 .    A_loc - the local sequential matrix generated
4449 
4450     Level: developer
4451 
4452 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4453 
4454 @*/
4455 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4456 {
4457   PetscErrorCode ierr;
4458   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4459   Mat_SeqAIJ     *mat,*a,*b;
4460   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4461   MatScalar      *aa,*ba,*cam;
4462   PetscScalar    *ca;
4463   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4464   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4465   PetscBool      match;
4466   MPI_Comm       comm;
4467   PetscMPIInt    size;
4468 
4469   PetscFunctionBegin;
4470   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4471   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4472   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4473   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4474   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4475 
4476   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4477   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4478   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4479   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4480   aa = a->a; ba = b->a;
4481   if (scall == MAT_INITIAL_MATRIX) {
4482     if (size == 1) {
4483       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4484       PetscFunctionReturn(0);
4485     }
4486 
4487     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4488     ci[0] = 0;
4489     for (i=0; i<am; i++) {
4490       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4491     }
4492     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4493     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4494     k    = 0;
4495     for (i=0; i<am; i++) {
4496       ncols_o = bi[i+1] - bi[i];
4497       ncols_d = ai[i+1] - ai[i];
4498       /* off-diagonal portion of A */
4499       for (jo=0; jo<ncols_o; jo++) {
4500         col = cmap[*bj];
4501         if (col >= cstart) break;
4502         cj[k]   = col; bj++;
4503         ca[k++] = *ba++;
4504       }
4505       /* diagonal portion of A */
4506       for (j=0; j<ncols_d; j++) {
4507         cj[k]   = cstart + *aj++;
4508         ca[k++] = *aa++;
4509       }
4510       /* off-diagonal portion of A */
4511       for (j=jo; j<ncols_o; j++) {
4512         cj[k]   = cmap[*bj++];
4513         ca[k++] = *ba++;
4514       }
4515     }
4516     /* put together the new matrix */
4517     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4518     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4519     /* Since these are PETSc arrays, change flags to free them as necessary. */
4520     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4521     mat->free_a  = PETSC_TRUE;
4522     mat->free_ij = PETSC_TRUE;
4523     mat->nonew   = 0;
4524   } else if (scall == MAT_REUSE_MATRIX) {
4525     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4526     ci = mat->i; cj = mat->j; cam = mat->a;
4527     for (i=0; i<am; i++) {
4528       /* off-diagonal portion of A */
4529       ncols_o = bi[i+1] - bi[i];
4530       for (jo=0; jo<ncols_o; jo++) {
4531         col = cmap[*bj];
4532         if (col >= cstart) break;
4533         *cam++ = *ba++; bj++;
4534       }
4535       /* diagonal portion of A */
4536       ncols_d = ai[i+1] - ai[i];
4537       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4538       /* off-diagonal portion of A */
4539       for (j=jo; j<ncols_o; j++) {
4540         *cam++ = *ba++; bj++;
4541       }
4542     }
4543   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4544   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4545   PetscFunctionReturn(0);
4546 }
4547 
4548 #undef __FUNCT__
4549 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4550 /*@C
4551      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4552 
4553     Not Collective
4554 
4555    Input Parameters:
4556 +    A - the matrix
4557 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4558 -    row, col - index sets of rows and columns to extract (or NULL)
4559 
4560    Output Parameter:
4561 .    A_loc - the local sequential matrix generated
4562 
4563     Level: developer
4564 
4565 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4566 
4567 @*/
4568 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4569 {
4570   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4571   PetscErrorCode ierr;
4572   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4573   IS             isrowa,iscola;
4574   Mat            *aloc;
4575   PetscBool      match;
4576 
4577   PetscFunctionBegin;
4578   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4579   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4580   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4581   if (!row) {
4582     start = A->rmap->rstart; end = A->rmap->rend;
4583     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4584   } else {
4585     isrowa = *row;
4586   }
4587   if (!col) {
4588     start = A->cmap->rstart;
4589     cmap  = a->garray;
4590     nzA   = a->A->cmap->n;
4591     nzB   = a->B->cmap->n;
4592     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4593     ncols = 0;
4594     for (i=0; i<nzB; i++) {
4595       if (cmap[i] < start) idx[ncols++] = cmap[i];
4596       else break;
4597     }
4598     imark = i;
4599     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4600     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4601     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4602   } else {
4603     iscola = *col;
4604   }
4605   if (scall != MAT_INITIAL_MATRIX) {
4606     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4607     aloc[0] = *A_loc;
4608   }
4609   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4610   *A_loc = aloc[0];
4611   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4612   if (!row) {
4613     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4614   }
4615   if (!col) {
4616     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4617   }
4618   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4619   PetscFunctionReturn(0);
4620 }
4621 
4622 #undef __FUNCT__
4623 #define __FUNCT__ "MatGetBrowsOfAcols"
4624 /*@C
4625     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4626 
4627     Collective on Mat
4628 
4629    Input Parameters:
4630 +    A,B - the matrices in mpiaij format
4631 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4632 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4633 
4634    Output Parameter:
4635 +    rowb, colb - index sets of rows and columns of B to extract
4636 -    B_seq - the sequential matrix generated
4637 
4638     Level: developer
4639 
4640 @*/
4641 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4642 {
4643   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4644   PetscErrorCode ierr;
4645   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4646   IS             isrowb,iscolb;
4647   Mat            *bseq=NULL;
4648 
4649   PetscFunctionBegin;
4650   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4651     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4652   }
4653   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4654 
4655   if (scall == MAT_INITIAL_MATRIX) {
4656     start = A->cmap->rstart;
4657     cmap  = a->garray;
4658     nzA   = a->A->cmap->n;
4659     nzB   = a->B->cmap->n;
4660     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4661     ncols = 0;
4662     for (i=0; i<nzB; i++) {  /* row < local row index */
4663       if (cmap[i] < start) idx[ncols++] = cmap[i];
4664       else break;
4665     }
4666     imark = i;
4667     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4668     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4669     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4670     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4671   } else {
4672     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4673     isrowb  = *rowb; iscolb = *colb;
4674     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4675     bseq[0] = *B_seq;
4676   }
4677   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4678   *B_seq = bseq[0];
4679   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4680   if (!rowb) {
4681     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4682   } else {
4683     *rowb = isrowb;
4684   }
4685   if (!colb) {
4686     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4687   } else {
4688     *colb = iscolb;
4689   }
4690   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4691   PetscFunctionReturn(0);
4692 }
4693 
4694 #undef __FUNCT__
4695 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4696 /*
4697     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4698     of the OFF-DIAGONAL portion of local A
4699 
4700     Collective on Mat
4701 
4702    Input Parameters:
4703 +    A,B - the matrices in mpiaij format
4704 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4705 
4706    Output Parameter:
4707 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4708 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4709 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4710 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4711 
4712     Level: developer
4713 
4714 */
4715 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4716 {
4717   VecScatter_MPI_General *gen_to,*gen_from;
4718   PetscErrorCode         ierr;
4719   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4720   Mat_SeqAIJ             *b_oth;
4721   VecScatter             ctx =a->Mvctx;
4722   MPI_Comm               comm;
4723   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4724   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4725   PetscScalar            *rvalues,*svalues;
4726   MatScalar              *b_otha,*bufa,*bufA;
4727   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4728   MPI_Request            *rwaits = NULL,*swaits = NULL;
4729   MPI_Status             *sstatus,rstatus;
4730   PetscMPIInt            jj,size;
4731   PetscInt               *cols,sbs,rbs;
4732   PetscScalar            *vals;
4733 
4734   PetscFunctionBegin;
4735   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4736   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4737 
4738   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4739     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4740   }
4741   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4742   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4743 
4744   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4745   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4746   rvalues  = gen_from->values; /* holds the length of receiving row */
4747   svalues  = gen_to->values;   /* holds the length of sending row */
4748   nrecvs   = gen_from->n;
4749   nsends   = gen_to->n;
4750 
4751   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4752   srow    = gen_to->indices;    /* local row index to be sent */
4753   sstarts = gen_to->starts;
4754   sprocs  = gen_to->procs;
4755   sstatus = gen_to->sstatus;
4756   sbs     = gen_to->bs;
4757   rstarts = gen_from->starts;
4758   rprocs  = gen_from->procs;
4759   rbs     = gen_from->bs;
4760 
4761   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4762   if (scall == MAT_INITIAL_MATRIX) {
4763     /* i-array */
4764     /*---------*/
4765     /*  post receives */
4766     for (i=0; i<nrecvs; i++) {
4767       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4768       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4769       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4770     }
4771 
4772     /* pack the outgoing message */
4773     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4774 
4775     sstartsj[0] = 0;
4776     rstartsj[0] = 0;
4777     len         = 0; /* total length of j or a array to be sent */
4778     k           = 0;
4779     for (i=0; i<nsends; i++) {
4780       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4781       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4782       for (j=0; j<nrows; j++) {
4783         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4784         for (l=0; l<sbs; l++) {
4785           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4786 
4787           rowlen[j*sbs+l] = ncols;
4788 
4789           len += ncols;
4790           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4791         }
4792         k++;
4793       }
4794       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4795 
4796       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4797     }
4798     /* recvs and sends of i-array are completed */
4799     i = nrecvs;
4800     while (i--) {
4801       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4802     }
4803     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4804 
4805     /* allocate buffers for sending j and a arrays */
4806     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4807     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4808 
4809     /* create i-array of B_oth */
4810     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4811 
4812     b_othi[0] = 0;
4813     len       = 0; /* total length of j or a array to be received */
4814     k         = 0;
4815     for (i=0; i<nrecvs; i++) {
4816       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4817       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4818       for (j=0; j<nrows; j++) {
4819         b_othi[k+1] = b_othi[k] + rowlen[j];
4820         len        += rowlen[j]; k++;
4821       }
4822       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4823     }
4824 
4825     /* allocate space for j and a arrrays of B_oth */
4826     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4827     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4828 
4829     /* j-array */
4830     /*---------*/
4831     /*  post receives of j-array */
4832     for (i=0; i<nrecvs; i++) {
4833       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4834       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4835     }
4836 
4837     /* pack the outgoing message j-array */
4838     k = 0;
4839     for (i=0; i<nsends; i++) {
4840       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4841       bufJ  = bufj+sstartsj[i];
4842       for (j=0; j<nrows; j++) {
4843         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4844         for (ll=0; ll<sbs; ll++) {
4845           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4846           for (l=0; l<ncols; l++) {
4847             *bufJ++ = cols[l];
4848           }
4849           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4850         }
4851       }
4852       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4853     }
4854 
4855     /* recvs and sends of j-array are completed */
4856     i = nrecvs;
4857     while (i--) {
4858       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4859     }
4860     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4861   } else if (scall == MAT_REUSE_MATRIX) {
4862     sstartsj = *startsj_s;
4863     rstartsj = *startsj_r;
4864     bufa     = *bufa_ptr;
4865     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4866     b_otha   = b_oth->a;
4867   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4868 
4869   /* a-array */
4870   /*---------*/
4871   /*  post receives of a-array */
4872   for (i=0; i<nrecvs; i++) {
4873     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4874     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4875   }
4876 
4877   /* pack the outgoing message a-array */
4878   k = 0;
4879   for (i=0; i<nsends; i++) {
4880     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4881     bufA  = bufa+sstartsj[i];
4882     for (j=0; j<nrows; j++) {
4883       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4884       for (ll=0; ll<sbs; ll++) {
4885         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4886         for (l=0; l<ncols; l++) {
4887           *bufA++ = vals[l];
4888         }
4889         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4890       }
4891     }
4892     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4893   }
4894   /* recvs and sends of a-array are completed */
4895   i = nrecvs;
4896   while (i--) {
4897     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4898   }
4899   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4900   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4901 
4902   if (scall == MAT_INITIAL_MATRIX) {
4903     /* put together the new matrix */
4904     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4905 
4906     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4907     /* Since these are PETSc arrays, change flags to free them as necessary. */
4908     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4909     b_oth->free_a  = PETSC_TRUE;
4910     b_oth->free_ij = PETSC_TRUE;
4911     b_oth->nonew   = 0;
4912 
4913     ierr = PetscFree(bufj);CHKERRQ(ierr);
4914     if (!startsj_s || !bufa_ptr) {
4915       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4916       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4917     } else {
4918       *startsj_s = sstartsj;
4919       *startsj_r = rstartsj;
4920       *bufa_ptr  = bufa;
4921     }
4922   }
4923   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4924   PetscFunctionReturn(0);
4925 }
4926 
4927 #undef __FUNCT__
4928 #define __FUNCT__ "MatGetCommunicationStructs"
4929 /*@C
4930   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4931 
4932   Not Collective
4933 
4934   Input Parameters:
4935 . A - The matrix in mpiaij format
4936 
4937   Output Parameter:
4938 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4939 . colmap - A map from global column index to local index into lvec
4940 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4941 
4942   Level: developer
4943 
4944 @*/
4945 #if defined(PETSC_USE_CTABLE)
4946 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4947 #else
4948 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4949 #endif
4950 {
4951   Mat_MPIAIJ *a;
4952 
4953   PetscFunctionBegin;
4954   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4955   PetscValidPointer(lvec, 2);
4956   PetscValidPointer(colmap, 3);
4957   PetscValidPointer(multScatter, 4);
4958   a = (Mat_MPIAIJ*) A->data;
4959   if (lvec) *lvec = a->lvec;
4960   if (colmap) *colmap = a->colmap;
4961   if (multScatter) *multScatter = a->Mvctx;
4962   PetscFunctionReturn(0);
4963 }
4964 
4965 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4966 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4967 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4968 #if defined(PETSC_HAVE_ELEMENTAL)
4969 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4970 #endif
4971 
4972 #undef __FUNCT__
4973 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4974 /*
4975     Computes (B'*A')' since computing B*A directly is untenable
4976 
4977                n                       p                          p
4978         (              )       (              )         (                  )
4979       m (      A       )  *  n (       B      )   =   m (         C        )
4980         (              )       (              )         (                  )
4981 
4982 */
4983 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4984 {
4985   PetscErrorCode ierr;
4986   Mat            At,Bt,Ct;
4987 
4988   PetscFunctionBegin;
4989   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4990   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4991   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4992   ierr = MatDestroy(&At);CHKERRQ(ierr);
4993   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4994   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4995   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4996   PetscFunctionReturn(0);
4997 }
4998 
4999 #undef __FUNCT__
5000 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5001 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5002 {
5003   PetscErrorCode ierr;
5004   PetscInt       m=A->rmap->n,n=B->cmap->n;
5005   Mat            Cmat;
5006 
5007   PetscFunctionBegin;
5008   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5009   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5010   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5011   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5012   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5013   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5014   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5015   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5016 
5017   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5018 
5019   *C = Cmat;
5020   PetscFunctionReturn(0);
5021 }
5022 
5023 /* ----------------------------------------------------------------*/
5024 #undef __FUNCT__
5025 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5026 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5027 {
5028   PetscErrorCode ierr;
5029 
5030   PetscFunctionBegin;
5031   if (scall == MAT_INITIAL_MATRIX) {
5032     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5033     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5034     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5035   }
5036   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5037   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5038   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5039   PetscFunctionReturn(0);
5040 }
5041 
5042 /*MC
5043    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5044 
5045    Options Database Keys:
5046 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5047 
5048   Level: beginner
5049 
5050 .seealso: MatCreateAIJ()
5051 M*/
5052 
5053 #undef __FUNCT__
5054 #define __FUNCT__ "MatCreate_MPIAIJ"
5055 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5056 {
5057   Mat_MPIAIJ     *b;
5058   PetscErrorCode ierr;
5059   PetscMPIInt    size;
5060 
5061   PetscFunctionBegin;
5062   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5063 
5064   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5065   B->data       = (void*)b;
5066   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5067   B->assembled  = PETSC_FALSE;
5068   B->insertmode = NOT_SET_VALUES;
5069   b->size       = size;
5070 
5071   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5072 
5073   /* build cache for off array entries formed */
5074   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5075 
5076   b->donotstash  = PETSC_FALSE;
5077   b->colmap      = 0;
5078   b->garray      = 0;
5079   b->roworiented = PETSC_TRUE;
5080 
5081   /* stuff used for matrix vector multiply */
5082   b->lvec  = NULL;
5083   b->Mvctx = NULL;
5084 
5085   /* stuff for MatGetRow() */
5086   b->rowindices   = 0;
5087   b->rowvalues    = 0;
5088   b->getrowactive = PETSC_FALSE;
5089 
5090   /* flexible pointer used in CUSP/CUSPARSE classes */
5091   b->spptr = NULL;
5092 
5093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5103 #if defined(PETSC_HAVE_ELEMENTAL)
5104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5105 #endif
5106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5109   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5110   PetscFunctionReturn(0);
5111 }
5112 
5113 #undef __FUNCT__
5114 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5115 /*@C
5116      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5117          and "off-diagonal" part of the matrix in CSR format.
5118 
5119    Collective on MPI_Comm
5120 
5121    Input Parameters:
5122 +  comm - MPI communicator
5123 .  m - number of local rows (Cannot be PETSC_DECIDE)
5124 .  n - This value should be the same as the local size used in creating the
5125        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5126        calculated if N is given) For square matrices n is almost always m.
5127 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5128 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5129 .   i - row indices for "diagonal" portion of matrix
5130 .   j - column indices
5131 .   a - matrix values
5132 .   oi - row indices for "off-diagonal" portion of matrix
5133 .   oj - column indices
5134 -   oa - matrix values
5135 
5136    Output Parameter:
5137 .   mat - the matrix
5138 
5139    Level: advanced
5140 
5141    Notes:
5142        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5143        must free the arrays once the matrix has been destroyed and not before.
5144 
5145        The i and j indices are 0 based
5146 
5147        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5148 
5149        This sets local rows and cannot be used to set off-processor values.
5150 
5151        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5152        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5153        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5154        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5155        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5156        communication if it is known that only local entries will be set.
5157 
5158 .keywords: matrix, aij, compressed row, sparse, parallel
5159 
5160 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5161           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5162 @*/
5163 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5164 {
5165   PetscErrorCode ierr;
5166   Mat_MPIAIJ     *maij;
5167 
5168   PetscFunctionBegin;
5169   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5170   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5171   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5172   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5173   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5174   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5175   maij = (Mat_MPIAIJ*) (*mat)->data;
5176 
5177   (*mat)->preallocated = PETSC_TRUE;
5178 
5179   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5180   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5181 
5182   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5183   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5184 
5185   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5186   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5187   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5188   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5189 
5190   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5191   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5192   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5193   PetscFunctionReturn(0);
5194 }
5195 
5196 /*
5197     Special version for direct calls from Fortran
5198 */
5199 #include <petsc/private/fortranimpl.h>
5200 
5201 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5202 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5203 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5204 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5205 #endif
5206 
5207 /* Change these macros so can be used in void function */
5208 #undef CHKERRQ
5209 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5210 #undef SETERRQ2
5211 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5212 #undef SETERRQ3
5213 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5214 #undef SETERRQ
5215 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5216 
5217 #undef __FUNCT__
5218 #define __FUNCT__ "matsetvaluesmpiaij_"
5219 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5220 {
5221   Mat            mat  = *mmat;
5222   PetscInt       m    = *mm, n = *mn;
5223   InsertMode     addv = *maddv;
5224   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5225   PetscScalar    value;
5226   PetscErrorCode ierr;
5227 
5228   MatCheckPreallocated(mat,1);
5229   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5230 
5231 #if defined(PETSC_USE_DEBUG)
5232   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5233 #endif
5234   {
5235     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5236     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5237     PetscBool roworiented = aij->roworiented;
5238 
5239     /* Some Variables required in the macro */
5240     Mat        A                 = aij->A;
5241     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5242     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5243     MatScalar  *aa               = a->a;
5244     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5245     Mat        B                 = aij->B;
5246     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5247     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5248     MatScalar  *ba               = b->a;
5249 
5250     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5251     PetscInt  nonew = a->nonew;
5252     MatScalar *ap1,*ap2;
5253 
5254     PetscFunctionBegin;
5255     for (i=0; i<m; i++) {
5256       if (im[i] < 0) continue;
5257 #if defined(PETSC_USE_DEBUG)
5258       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5259 #endif
5260       if (im[i] >= rstart && im[i] < rend) {
5261         row      = im[i] - rstart;
5262         lastcol1 = -1;
5263         rp1      = aj + ai[row];
5264         ap1      = aa + ai[row];
5265         rmax1    = aimax[row];
5266         nrow1    = ailen[row];
5267         low1     = 0;
5268         high1    = nrow1;
5269         lastcol2 = -1;
5270         rp2      = bj + bi[row];
5271         ap2      = ba + bi[row];
5272         rmax2    = bimax[row];
5273         nrow2    = bilen[row];
5274         low2     = 0;
5275         high2    = nrow2;
5276 
5277         for (j=0; j<n; j++) {
5278           if (roworiented) value = v[i*n+j];
5279           else value = v[i+j*m];
5280           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5281           if (in[j] >= cstart && in[j] < cend) {
5282             col = in[j] - cstart;
5283             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5284           } else if (in[j] < 0) continue;
5285 #if defined(PETSC_USE_DEBUG)
5286           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5287 #endif
5288           else {
5289             if (mat->was_assembled) {
5290               if (!aij->colmap) {
5291                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5292               }
5293 #if defined(PETSC_USE_CTABLE)
5294               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5295               col--;
5296 #else
5297               col = aij->colmap[in[j]] - 1;
5298 #endif
5299               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5300                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5301                 col  =  in[j];
5302                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5303                 B     = aij->B;
5304                 b     = (Mat_SeqAIJ*)B->data;
5305                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5306                 rp2   = bj + bi[row];
5307                 ap2   = ba + bi[row];
5308                 rmax2 = bimax[row];
5309                 nrow2 = bilen[row];
5310                 low2  = 0;
5311                 high2 = nrow2;
5312                 bm    = aij->B->rmap->n;
5313                 ba    = b->a;
5314               }
5315             } else col = in[j];
5316             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5317           }
5318         }
5319       } else if (!aij->donotstash) {
5320         if (roworiented) {
5321           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5322         } else {
5323           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5324         }
5325       }
5326     }
5327   }
5328   PetscFunctionReturnVoid();
5329 }
5330 
5331