xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 17eaebbeecbcead2931bf7ab9dfe4858aa77ab07)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petsc-private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685   InsertMode     addv;
686 
687   PetscFunctionBegin;
688   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
689 
690   /* make sure all processors are either in INSERTMODE or ADDMODE */
691   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
692   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
693   mat->insertmode = addv; /* in case this processor had no cache */
694 
695   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
696   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
697   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
703 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
704 {
705   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
706   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
707   PetscErrorCode ierr;
708   PetscMPIInt    n;
709   PetscInt       i,j,rstart,ncols,flg;
710   PetscInt       *row,*col;
711   PetscBool      other_disassembled;
712   PetscScalar    *val;
713   InsertMode     addv = mat->insertmode;
714 
715   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
716 
717   PetscFunctionBegin;
718   if (!aij->donotstash && !mat->nooffprocentries) {
719     while (1) {
720       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
721       if (!flg) break;
722 
723       for (i=0; i<n; ) {
724         /* Now identify the consecutive vals belonging to the same row */
725         for (j=i,rstart=row[j]; j<n; j++) {
726           if (row[j] != rstart) break;
727         }
728         if (j < n) ncols = j-i;
729         else       ncols = n-i;
730         /* Now assemble all these values with a single function call */
731         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
732 
733         i = j;
734       }
735     }
736     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
737   }
738   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
740 
741   /* determine if any processor has disassembled, if so we must
742      also disassemble ourselfs, in order that we may reassemble. */
743   /*
744      if nonzero structure of submatrix B cannot change then we know that
745      no processor disassembled thus we can skip this stuff
746   */
747   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
748     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
749     if (mat->was_assembled && !other_disassembled) {
750       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
751     }
752   }
753   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
754     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
755   }
756   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
757   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
758   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
759 
760   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
761 
762   aij->rowvalues = 0;
763 
764   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
765   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
766 
767   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
768   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
769     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
770     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNCT__
776 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
777 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
778 {
779   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
780   PetscErrorCode ierr;
781 
782   PetscFunctionBegin;
783   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
784   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatZeroRows_MPIAIJ"
790 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
791 {
792   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
793   PetscInt      *owners = A->rmap->range;
794   PetscInt       n      = A->rmap->n;
795   PetscSF        sf;
796   PetscInt      *lrows;
797   PetscSFNode   *rrows;
798   PetscInt       r, p = 0, len = 0;
799   PetscErrorCode ierr;
800 
801   PetscFunctionBegin;
802   /* Create SF where leaves are input rows and roots are owned rows */
803   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
804   for (r = 0; r < n; ++r) lrows[r] = -1;
805   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
806   for (r = 0; r < N; ++r) {
807     const PetscInt idx   = rows[r];
808     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
809     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
810       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
811     }
812     if (A->nooffproczerorows) {
813       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
814       lrows[len++] = idx - owners[p];
815     } else {
816       rrows[r].rank = p;
817       rrows[r].index = rows[r] - owners[p];
818     }
819   }
820   if (!A->nooffproczerorows) {
821     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
822     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
823     /* Collect flags for rows to be zeroed */
824     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
825     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
826     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
827     /* Compress and put in row numbers */
828     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
829   }
830   /* fix right hand side if needed */
831   if (x && b) {
832     const PetscScalar *xx;
833     PetscScalar       *bb;
834 
835     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
836     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
837     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
838     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
839     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
840   }
841   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
843   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
844     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
848     for (r = 0; r < len; ++r) {
849       const PetscInt row = lrows[r] + A->rmap->rstart;
850       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
851     }
852     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
853     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
854   } else {
855     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
856   }
857   ierr = PetscFree(lrows);CHKERRQ(ierr);
858 
859   /* only change matrix nonzero state if pattern was allowed to be changed */
860   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
861     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
862     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
869 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
870 {
871   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
872   PetscErrorCode    ierr;
873   PetscMPIInt       n = A->rmap->n;
874   PetscInt          i,j,r,m,p = 0,len = 0;
875   PetscInt          *lrows,*owners = A->rmap->range;
876   PetscSFNode       *rrows;
877   PetscSF           sf;
878   const PetscScalar *xx;
879   PetscScalar       *bb,*mask;
880   Vec               xmask,lmask;
881   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
882   const PetscInt    *aj, *ii,*ridx;
883   PetscScalar       *aa;
884 
885   PetscFunctionBegin;
886   /* Create SF where leaves are input rows and roots are owned rows */
887   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
888   for (r = 0; r < n; ++r) lrows[r] = -1;
889   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
890   for (r = 0; r < N; ++r) {
891     const PetscInt idx   = rows[r];
892     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
893     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
894       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
895     }
896     rrows[r].rank  = p;
897     rrows[r].index = rows[r] - owners[p];
898   }
899   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
900   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
901   /* Collect flags for rows to be zeroed */
902   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
903   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
904   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
905   /* Compress and put in row numbers */
906   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
907   /* zero diagonal part of matrix */
908   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
909   /* handle off diagonal part of matrix */
910   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
911   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
912   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
913   for (i=0; i<len; i++) bb[lrows[i]] = 1;
914   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
915   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
918   if (x) {
919     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
922     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
923   }
924   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
925   /* remove zeroed rows of off diagonal matrix */
926   ii = aij->i;
927   for (i=0; i<len; i++) {
928     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
929   }
930   /* loop over all elements of off process part of matrix zeroing removed columns*/
931   if (aij->compressedrow.use) {
932     m    = aij->compressedrow.nrows;
933     ii   = aij->compressedrow.i;
934     ridx = aij->compressedrow.rindex;
935     for (i=0; i<m; i++) {
936       n  = ii[i+1] - ii[i];
937       aj = aij->j + ii[i];
938       aa = aij->a + ii[i];
939 
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[*ridx] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948       ridx++;
949     }
950   } else { /* do not use compressed row format */
951     m = l->B->rmap->n;
952     for (i=0; i<m; i++) {
953       n  = ii[i+1] - ii[i];
954       aj = aij->j + ii[i];
955       aa = aij->a + ii[i];
956       for (j=0; j<n; j++) {
957         if (PetscAbsScalar(mask[*aj])) {
958           if (b) bb[i] -= *aa*xx[*aj];
959           *aa = 0.0;
960         }
961         aa++;
962         aj++;
963       }
964     }
965   }
966   if (x) {
967     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
968     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
969   }
970   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
971   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
972   ierr = PetscFree(lrows);CHKERRQ(ierr);
973 
974   /* only change matrix nonzero state if pattern was allowed to be changed */
975   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
976     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
977     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatMult_MPIAIJ"
984 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
985 {
986   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
987   PetscErrorCode ierr;
988   PetscInt       nt;
989 
990   PetscFunctionBegin;
991   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
992   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
993   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
995   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1002 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1003 {
1004   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1014 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1015 {
1016   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1022   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1029 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1030 {
1031   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1032   PetscErrorCode ierr;
1033   PetscBool      merged;
1034 
1035   PetscFunctionBegin;
1036   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1037   /* do nondiagonal part */
1038   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1039   if (!merged) {
1040     /* send it on its way */
1041     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1042     /* do local part */
1043     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1044     /* receive remote parts: note this assumes the values are not actually */
1045     /* added in yy until the next line, */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   } else {
1048     /* do local part */
1049     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1050     /* send it on its way */
1051     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1052     /* values actually were received in the Begin() but we need to call this nop */
1053     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1060 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1061 {
1062   MPI_Comm       comm;
1063   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1064   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1065   IS             Me,Notme;
1066   PetscErrorCode ierr;
1067   PetscInt       M,N,first,last,*notme,i;
1068   PetscMPIInt    size;
1069 
1070   PetscFunctionBegin;
1071   /* Easy test: symmetric diagonal block */
1072   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1073   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1074   if (!*f) PetscFunctionReturn(0);
1075   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1076   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1077   if (size == 1) PetscFunctionReturn(0);
1078 
1079   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1080   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1081   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1082   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1083   for (i=0; i<first; i++) notme[i] = i;
1084   for (i=last; i<M; i++) notme[i-last+first] = i;
1085   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1086   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1087   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1088   Aoff = Aoffs[0];
1089   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1090   Boff = Boffs[0];
1091   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1092   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1093   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1094   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1095   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1096   ierr = PetscFree(notme);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1102 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   /* do nondiagonal part */
1109   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1110   /* send it on its way */
1111   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1112   /* do local part */
1113   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1114   /* receive remote parts */
1115   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 /*
1120   This only works correctly for square matrices where the subblock A->A is the
1121    diagonal block
1122 */
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1125 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1126 {
1127   PetscErrorCode ierr;
1128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1129 
1130   PetscFunctionBegin;
1131   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1132   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1133   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatScale_MPIAIJ"
1139 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1140 {
1141   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1146   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatDestroy_MPIAIJ"
1152 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1153 {
1154   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158 #if defined(PETSC_USE_LOG)
1159   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1160 #endif
1161   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1162   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1163   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1164   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1165 #if defined(PETSC_USE_CTABLE)
1166   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1167 #else
1168   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1169 #endif
1170   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1171   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1172   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1173   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1174   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1175   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1176 
1177   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1186 #if defined(PETSC_HAVE_ELEMENTAL)
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1188 #endif
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1194 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1195 {
1196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1197   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1198   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1199   PetscErrorCode ierr;
1200   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1201   int            fd;
1202   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1203   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1204   PetscScalar    *column_values;
1205   PetscInt       message_count,flowcontrolcount;
1206   FILE           *file;
1207 
1208   PetscFunctionBegin;
1209   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1210   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1211   nz   = A->nz + B->nz;
1212   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1213   if (!rank) {
1214     header[0] = MAT_FILE_CLASSID;
1215     header[1] = mat->rmap->N;
1216     header[2] = mat->cmap->N;
1217 
1218     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     /* get largest number of rows any processor has */
1221     rlen  = mat->rmap->n;
1222     range = mat->rmap->range;
1223     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1224   } else {
1225     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226     rlen = mat->rmap->n;
1227   }
1228 
1229   /* load up the local row counts */
1230   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1231   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1232 
1233   /* store the row lengths to the file */
1234   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1235   if (!rank) {
1236     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1239       rlen = range[i+1] - range[i];
1240       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1246     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1248   }
1249   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1250 
1251   /* load up the local column indices */
1252   nzmax = nz; /* th processor needs space a largest processor needs */
1253   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1255   cnt   = 0;
1256   for (i=0; i<mat->rmap->n; i++) {
1257     for (j=B->i[i]; j<B->i[i+1]; j++) {
1258       if ((col = garray[B->j[j]]) > cstart) break;
1259       column_indices[cnt++] = col;
1260     }
1261     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1262     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1263   }
1264   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1265 
1266   /* store the column indices to the file */
1267   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1268   if (!rank) {
1269     MPI_Status status;
1270     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1271     for (i=1; i<size; i++) {
1272       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1273       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1274       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1275       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1277     }
1278     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1279   } else {
1280     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1281     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1284   }
1285   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1286 
1287   /* load up the local column values */
1288   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1289   cnt  = 0;
1290   for (i=0; i<mat->rmap->n; i++) {
1291     for (j=B->i[i]; j<B->i[i+1]; j++) {
1292       if (garray[B->j[j]] > cstart) break;
1293       column_values[cnt++] = B->a[j];
1294     }
1295     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1296     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1297   }
1298   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1299 
1300   /* store the column values to the file */
1301   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1302   if (!rank) {
1303     MPI_Status status;
1304     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1305     for (i=1; i<size; i++) {
1306       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1307       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1308       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1309       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1311     }
1312     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1313   } else {
1314     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1315     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1316     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1317     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1318   }
1319   ierr = PetscFree(column_values);CHKERRQ(ierr);
1320 
1321   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1322   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #include <petscdraw.h>
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1329 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1330 {
1331   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1332   PetscErrorCode    ierr;
1333   PetscMPIInt       rank = aij->rank,size = aij->size;
1334   PetscBool         isdraw,iascii,isbinary;
1335   PetscViewer       sviewer;
1336   PetscViewerFormat format;
1337 
1338   PetscFunctionBegin;
1339   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1340   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1341   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1342   if (iascii) {
1343     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1344     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1345       MatInfo   info;
1346       PetscBool inodes;
1347 
1348       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1349       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1350       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1351       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1352       if (!inodes) {
1353         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1354                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1355       } else {
1356         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1357                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1358       }
1359       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1360       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1361       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1362       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1363       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1365       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1366       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1367       PetscFunctionReturn(0);
1368     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1369       PetscInt inodecount,inodelimit,*inodes;
1370       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1371       if (inodes) {
1372         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1373       } else {
1374         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1375       }
1376       PetscFunctionReturn(0);
1377     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1378       PetscFunctionReturn(0);
1379     }
1380   } else if (isbinary) {
1381     if (size == 1) {
1382       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1383       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1384     } else {
1385       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1386     }
1387     PetscFunctionReturn(0);
1388   } else if (isdraw) {
1389     PetscDraw draw;
1390     PetscBool isnull;
1391     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1392     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1393   }
1394 
1395   {
1396     /* assemble the entire matrix onto first processor. */
1397     Mat        A;
1398     Mat_SeqAIJ *Aloc;
1399     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1400     MatScalar  *a;
1401     const char *matname;
1402 
1403     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1404     if (!rank) {
1405       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1406     } else {
1407       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1408     }
1409     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1410     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1411     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1412     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1413     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1414 
1415     /* copy over the A part */
1416     Aloc = (Mat_SeqAIJ*)aij->A->data;
1417     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1418     row  = mat->rmap->rstart;
1419     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1420     for (i=0; i<m; i++) {
1421       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1422       row++;
1423       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1424     }
1425     aj = Aloc->j;
1426     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1427 
1428     /* copy over the B part */
1429     Aloc = (Mat_SeqAIJ*)aij->B->data;
1430     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1431     row  = mat->rmap->rstart;
1432     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1433     ct   = cols;
1434     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1435     for (i=0; i<m; i++) {
1436       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1437       row++;
1438       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1439     }
1440     ierr = PetscFree(ct);CHKERRQ(ierr);
1441     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1442     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1443     /*
1444        Everyone has to call to draw the matrix since the graphics waits are
1445        synchronized across all processors that share the PetscDraw object
1446     */
1447     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1448     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1449     if (!rank) {
1450       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1451       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1452     }
1453     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1454     ierr = MatDestroy(&A);CHKERRQ(ierr);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatView_MPIAIJ"
1461 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1462 {
1463   PetscErrorCode ierr;
1464   PetscBool      iascii,isdraw,issocket,isbinary;
1465 
1466   PetscFunctionBegin;
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1468   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1469   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1471   if (iascii || isdraw || isbinary || issocket) {
1472     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1473   }
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatSOR_MPIAIJ"
1479 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1480 {
1481   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1482   PetscErrorCode ierr;
1483   Vec            bb1 = 0;
1484   PetscBool      hasop;
1485 
1486   PetscFunctionBegin;
1487   if (flag == SOR_APPLY_UPPER) {
1488     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1489     PetscFunctionReturn(0);
1490   }
1491 
1492   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1493     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1494   }
1495 
1496   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1497     if (flag & SOR_ZERO_INITIAL_GUESS) {
1498       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1499       its--;
1500     }
1501 
1502     while (its--) {
1503       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505 
1506       /* update rhs: bb1 = bb - B*x */
1507       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1508       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1509 
1510       /* local sweep */
1511       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1512     }
1513   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1514     if (flag & SOR_ZERO_INITIAL_GUESS) {
1515       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1516       its--;
1517     }
1518     while (its--) {
1519       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1521 
1522       /* update rhs: bb1 = bb - B*x */
1523       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1524       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1525 
1526       /* local sweep */
1527       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1528     }
1529   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1530     if (flag & SOR_ZERO_INITIAL_GUESS) {
1531       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1532       its--;
1533     }
1534     while (its--) {
1535       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1537 
1538       /* update rhs: bb1 = bb - B*x */
1539       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1540       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1541 
1542       /* local sweep */
1543       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1544     }
1545   } else if (flag & SOR_EISENSTAT) {
1546     Vec xx1;
1547 
1548     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1549     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1550 
1551     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553     if (!mat->diag) {
1554       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1555       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1556     }
1557     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1558     if (hasop) {
1559       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1560     } else {
1561       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1562     }
1563     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1564 
1565     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1566 
1567     /* local sweep */
1568     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1569     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1570     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1571   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1572 
1573   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatPermute_MPIAIJ"
1579 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1580 {
1581   Mat            aA,aB,Aperm;
1582   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1583   PetscScalar    *aa,*ba;
1584   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1585   PetscSF        rowsf,sf;
1586   IS             parcolp = NULL;
1587   PetscBool      done;
1588   PetscErrorCode ierr;
1589 
1590   PetscFunctionBegin;
1591   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1592   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1593   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1594   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1595 
1596   /* Invert row permutation to find out where my rows should go */
1597   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1598   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1599   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1600   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1601   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1602   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1603 
1604   /* Invert column permutation to find out where my columns should go */
1605   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1606   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1607   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1608   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1609   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1610   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1611   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1612 
1613   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1614   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1615   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1616 
1617   /* Find out where my gcols should go */
1618   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1619   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1620   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1621   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1622   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1623   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1624   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1625   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1626 
1627   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1628   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1629   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1630   for (i=0; i<m; i++) {
1631     PetscInt row = rdest[i],rowner;
1632     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1633     for (j=ai[i]; j<ai[i+1]; j++) {
1634       PetscInt cowner,col = cdest[aj[j]];
1635       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1636       if (rowner == cowner) dnnz[i]++;
1637       else onnz[i]++;
1638     }
1639     for (j=bi[i]; j<bi[i+1]; j++) {
1640       PetscInt cowner,col = gcdest[bj[j]];
1641       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1642       if (rowner == cowner) dnnz[i]++;
1643       else onnz[i]++;
1644     }
1645   }
1646   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1647   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1648   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1649   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1650   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1651 
1652   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1653   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1654   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1655   for (i=0; i<m; i++) {
1656     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1657     PetscInt j0,rowlen;
1658     rowlen = ai[i+1] - ai[i];
1659     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1660       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1661       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1662     }
1663     rowlen = bi[i+1] - bi[i];
1664     for (j0=j=0; j<rowlen; j0=j) {
1665       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1666       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1667     }
1668   }
1669   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1672   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1673   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1674   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1675   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1676   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1677   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1678   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1679   *B = Aperm;
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1685 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1686 {
1687   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1688   Mat            A    = mat->A,B = mat->B;
1689   PetscErrorCode ierr;
1690   PetscReal      isend[5],irecv[5];
1691 
1692   PetscFunctionBegin;
1693   info->block_size = 1.0;
1694   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1695 
1696   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1697   isend[3] = info->memory;  isend[4] = info->mallocs;
1698 
1699   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1700 
1701   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1702   isend[3] += info->memory;  isend[4] += info->mallocs;
1703   if (flag == MAT_LOCAL) {
1704     info->nz_used      = isend[0];
1705     info->nz_allocated = isend[1];
1706     info->nz_unneeded  = isend[2];
1707     info->memory       = isend[3];
1708     info->mallocs      = isend[4];
1709   } else if (flag == MAT_GLOBAL_MAX) {
1710     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1711 
1712     info->nz_used      = irecv[0];
1713     info->nz_allocated = irecv[1];
1714     info->nz_unneeded  = irecv[2];
1715     info->memory       = irecv[3];
1716     info->mallocs      = irecv[4];
1717   } else if (flag == MAT_GLOBAL_SUM) {
1718     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1719 
1720     info->nz_used      = irecv[0];
1721     info->nz_allocated = irecv[1];
1722     info->nz_unneeded  = irecv[2];
1723     info->memory       = irecv[3];
1724     info->mallocs      = irecv[4];
1725   }
1726   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1727   info->fill_ratio_needed = 0;
1728   info->factor_mallocs    = 0;
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatSetOption_MPIAIJ"
1734 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1735 {
1736   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   switch (op) {
1741   case MAT_NEW_NONZERO_LOCATIONS:
1742   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1743   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1744   case MAT_KEEP_NONZERO_PATTERN:
1745   case MAT_NEW_NONZERO_LOCATION_ERR:
1746   case MAT_USE_INODES:
1747   case MAT_IGNORE_ZERO_ENTRIES:
1748     MatCheckPreallocated(A,1);
1749     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1750     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1751     break;
1752   case MAT_ROW_ORIENTED:
1753     a->roworiented = flg;
1754 
1755     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1756     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1757     break;
1758   case MAT_NEW_DIAGONALS:
1759     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1760     break;
1761   case MAT_IGNORE_OFF_PROC_ENTRIES:
1762     a->donotstash = flg;
1763     break;
1764   case MAT_SPD:
1765     A->spd_set = PETSC_TRUE;
1766     A->spd     = flg;
1767     if (flg) {
1768       A->symmetric                  = PETSC_TRUE;
1769       A->structurally_symmetric     = PETSC_TRUE;
1770       A->symmetric_set              = PETSC_TRUE;
1771       A->structurally_symmetric_set = PETSC_TRUE;
1772     }
1773     break;
1774   case MAT_SYMMETRIC:
1775     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1776     break;
1777   case MAT_STRUCTURALLY_SYMMETRIC:
1778     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1779     break;
1780   case MAT_HERMITIAN:
1781     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1782     break;
1783   case MAT_SYMMETRY_ETERNAL:
1784     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1785     break;
1786   default:
1787     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1788   }
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 #undef __FUNCT__
1793 #define __FUNCT__ "MatGetRow_MPIAIJ"
1794 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1795 {
1796   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1797   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1798   PetscErrorCode ierr;
1799   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1800   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1801   PetscInt       *cmap,*idx_p;
1802 
1803   PetscFunctionBegin;
1804   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1805   mat->getrowactive = PETSC_TRUE;
1806 
1807   if (!mat->rowvalues && (idx || v)) {
1808     /*
1809         allocate enough space to hold information from the longest row.
1810     */
1811     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1812     PetscInt   max = 1,tmp;
1813     for (i=0; i<matin->rmap->n; i++) {
1814       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1815       if (max < tmp) max = tmp;
1816     }
1817     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1818   }
1819 
1820   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1821   lrow = row - rstart;
1822 
1823   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1824   if (!v)   {pvA = 0; pvB = 0;}
1825   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1826   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1827   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1828   nztot = nzA + nzB;
1829 
1830   cmap = mat->garray;
1831   if (v  || idx) {
1832     if (nztot) {
1833       /* Sort by increasing column numbers, assuming A and B already sorted */
1834       PetscInt imark = -1;
1835       if (v) {
1836         *v = v_p = mat->rowvalues;
1837         for (i=0; i<nzB; i++) {
1838           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1839           else break;
1840         }
1841         imark = i;
1842         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1843         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1844       }
1845       if (idx) {
1846         *idx = idx_p = mat->rowindices;
1847         if (imark > -1) {
1848           for (i=0; i<imark; i++) {
1849             idx_p[i] = cmap[cworkB[i]];
1850           }
1851         } else {
1852           for (i=0; i<nzB; i++) {
1853             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1854             else break;
1855           }
1856           imark = i;
1857         }
1858         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1859         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1860       }
1861     } else {
1862       if (idx) *idx = 0;
1863       if (v)   *v   = 0;
1864     }
1865   }
1866   *nz  = nztot;
1867   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1868   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 #undef __FUNCT__
1873 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1874 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1875 {
1876   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1877 
1878   PetscFunctionBegin;
1879   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1880   aij->getrowactive = PETSC_FALSE;
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatNorm_MPIAIJ"
1886 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1887 {
1888   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1889   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1890   PetscErrorCode ierr;
1891   PetscInt       i,j,cstart = mat->cmap->rstart;
1892   PetscReal      sum = 0.0;
1893   MatScalar      *v;
1894 
1895   PetscFunctionBegin;
1896   if (aij->size == 1) {
1897     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1898   } else {
1899     if (type == NORM_FROBENIUS) {
1900       v = amat->a;
1901       for (i=0; i<amat->nz; i++) {
1902         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1903       }
1904       v = bmat->a;
1905       for (i=0; i<bmat->nz; i++) {
1906         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1907       }
1908       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1909       *norm = PetscSqrtReal(*norm);
1910     } else if (type == NORM_1) { /* max column norm */
1911       PetscReal *tmp,*tmp2;
1912       PetscInt  *jj,*garray = aij->garray;
1913       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1914       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1915       *norm = 0.0;
1916       v     = amat->a; jj = amat->j;
1917       for (j=0; j<amat->nz; j++) {
1918         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1919       }
1920       v = bmat->a; jj = bmat->j;
1921       for (j=0; j<bmat->nz; j++) {
1922         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1923       }
1924       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1925       for (j=0; j<mat->cmap->N; j++) {
1926         if (tmp2[j] > *norm) *norm = tmp2[j];
1927       }
1928       ierr = PetscFree(tmp);CHKERRQ(ierr);
1929       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1930     } else if (type == NORM_INFINITY) { /* max row norm */
1931       PetscReal ntemp = 0.0;
1932       for (j=0; j<aij->A->rmap->n; j++) {
1933         v   = amat->a + amat->i[j];
1934         sum = 0.0;
1935         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1936           sum += PetscAbsScalar(*v); v++;
1937         }
1938         v = bmat->a + bmat->i[j];
1939         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1940           sum += PetscAbsScalar(*v); v++;
1941         }
1942         if (sum > ntemp) ntemp = sum;
1943       }
1944       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1945     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1946   }
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatTranspose_MPIAIJ"
1952 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1953 {
1954   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1955   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1956   PetscErrorCode ierr;
1957   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1958   PetscInt       cstart = A->cmap->rstart,ncol;
1959   Mat            B;
1960   MatScalar      *array;
1961 
1962   PetscFunctionBegin;
1963   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1964 
1965   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1966   ai = Aloc->i; aj = Aloc->j;
1967   bi = Bloc->i; bj = Bloc->j;
1968   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1969     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1970     PetscSFNode          *oloc;
1971     PETSC_UNUSED PetscSF sf;
1972 
1973     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1974     /* compute d_nnz for preallocation */
1975     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1976     for (i=0; i<ai[ma]; i++) {
1977       d_nnz[aj[i]]++;
1978       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1979     }
1980     /* compute local off-diagonal contributions */
1981     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1982     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1983     /* map those to global */
1984     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1985     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1986     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1987     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1988     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1989     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1990     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1991 
1992     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1993     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1994     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1995     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1996     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1997     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1998   } else {
1999     B    = *matout;
2000     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2001     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2002   }
2003 
2004   /* copy over the A part */
2005   array = Aloc->a;
2006   row   = A->rmap->rstart;
2007   for (i=0; i<ma; i++) {
2008     ncol = ai[i+1]-ai[i];
2009     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2010     row++;
2011     array += ncol; aj += ncol;
2012   }
2013   aj = Aloc->j;
2014   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2015 
2016   /* copy over the B part */
2017   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2018   array = Bloc->a;
2019   row   = A->rmap->rstart;
2020   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2021   cols_tmp = cols;
2022   for (i=0; i<mb; i++) {
2023     ncol = bi[i+1]-bi[i];
2024     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2025     row++;
2026     array += ncol; cols_tmp += ncol;
2027   }
2028   ierr = PetscFree(cols);CHKERRQ(ierr);
2029 
2030   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2031   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2032   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2033     *matout = B;
2034   } else {
2035     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2036   }
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 #undef __FUNCT__
2041 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2042 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2043 {
2044   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2045   Mat            a    = aij->A,b = aij->B;
2046   PetscErrorCode ierr;
2047   PetscInt       s1,s2,s3;
2048 
2049   PetscFunctionBegin;
2050   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2051   if (rr) {
2052     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2053     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2054     /* Overlap communication with computation. */
2055     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2056   }
2057   if (ll) {
2058     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2059     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2060     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2061   }
2062   /* scale  the diagonal block */
2063   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2064 
2065   if (rr) {
2066     /* Do a scatter end and then right scale the off-diagonal block */
2067     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2068     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2069   }
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 #undef __FUNCT__
2074 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2075 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2076 {
2077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2078   PetscErrorCode ierr;
2079 
2080   PetscFunctionBegin;
2081   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatEqual_MPIAIJ"
2087 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2088 {
2089   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2090   Mat            a,b,c,d;
2091   PetscBool      flg;
2092   PetscErrorCode ierr;
2093 
2094   PetscFunctionBegin;
2095   a = matA->A; b = matA->B;
2096   c = matB->A; d = matB->B;
2097 
2098   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2099   if (flg) {
2100     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2101   }
2102   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 #undef __FUNCT__
2107 #define __FUNCT__ "MatCopy_MPIAIJ"
2108 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2109 {
2110   PetscErrorCode ierr;
2111   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2112   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2113 
2114   PetscFunctionBegin;
2115   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2116   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2117     /* because of the column compression in the off-processor part of the matrix a->B,
2118        the number of columns in a->B and b->B may be different, hence we cannot call
2119        the MatCopy() directly on the two parts. If need be, we can provide a more
2120        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2121        then copying the submatrices */
2122     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2123   } else {
2124     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2125     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2126   }
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 #undef __FUNCT__
2131 #define __FUNCT__ "MatSetUp_MPIAIJ"
2132 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2133 {
2134   PetscErrorCode ierr;
2135 
2136   PetscFunctionBegin;
2137   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 /*
2142    Computes the number of nonzeros per row needed for preallocation when X and Y
2143    have different nonzero structure.
2144 */
2145 #undef __FUNCT__
2146 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2147 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2148 {
2149   PetscInt       i,j,k,nzx,nzy;
2150 
2151   PetscFunctionBegin;
2152   /* Set the number of nonzeros in the new matrix */
2153   for (i=0; i<m; i++) {
2154     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2155     nzx = xi[i+1] - xi[i];
2156     nzy = yi[i+1] - yi[i];
2157     nnz[i] = 0;
2158     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2159       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2160       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2161       nnz[i]++;
2162     }
2163     for (; k<nzy; k++) nnz[i]++;
2164   }
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2171 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2172 {
2173   PetscErrorCode ierr;
2174   PetscInt       m = Y->rmap->N;
2175   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2176   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2177 
2178   PetscFunctionBegin;
2179   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 #undef __FUNCT__
2184 #define __FUNCT__ "MatAXPY_MPIAIJ"
2185 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2186 {
2187   PetscErrorCode ierr;
2188   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2189   PetscBLASInt   bnz,one=1;
2190   Mat_SeqAIJ     *x,*y;
2191 
2192   PetscFunctionBegin;
2193   if (str == SAME_NONZERO_PATTERN) {
2194     PetscScalar alpha = a;
2195     x    = (Mat_SeqAIJ*)xx->A->data;
2196     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2197     y    = (Mat_SeqAIJ*)yy->A->data;
2198     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2199     x    = (Mat_SeqAIJ*)xx->B->data;
2200     y    = (Mat_SeqAIJ*)yy->B->data;
2201     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2202     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2203     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2204   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2205     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2206   } else {
2207     Mat      B;
2208     PetscInt *nnz_d,*nnz_o;
2209     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2210     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2211     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2212     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2213     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2214     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2215     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2216     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2217     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2218     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2219     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2220     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2221     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2222     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2223   }
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2228 
2229 #undef __FUNCT__
2230 #define __FUNCT__ "MatConjugate_MPIAIJ"
2231 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2232 {
2233 #if defined(PETSC_USE_COMPLEX)
2234   PetscErrorCode ierr;
2235   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2236 
2237   PetscFunctionBegin;
2238   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2239   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2240 #else
2241   PetscFunctionBegin;
2242 #endif
2243   PetscFunctionReturn(0);
2244 }
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatRealPart_MPIAIJ"
2248 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2249 {
2250   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2251   PetscErrorCode ierr;
2252 
2253   PetscFunctionBegin;
2254   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2255   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2261 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2262 {
2263   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2264   PetscErrorCode ierr;
2265 
2266   PetscFunctionBegin;
2267   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2268   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 #if defined(PETSC_HAVE_PBGL)
2273 
2274 #include <boost/parallel/mpi/bsp_process_group.hpp>
2275 #include <boost/graph/distributed/ilu_default_graph.hpp>
2276 #include <boost/graph/distributed/ilu_0_block.hpp>
2277 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2278 #include <boost/graph/distributed/petsc/interface.hpp>
2279 #include <boost/multi_array.hpp>
2280 #include <boost/parallel/distributed_property_map->hpp>
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2284 /*
2285   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2286 */
2287 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2288 {
2289   namespace petsc = boost::distributed::petsc;
2290 
2291   namespace graph_dist = boost::graph::distributed;
2292   using boost::graph::distributed::ilu_default::process_group_type;
2293   using boost::graph::ilu_permuted;
2294 
2295   PetscBool      row_identity, col_identity;
2296   PetscContainer c;
2297   PetscInt       m, n, M, N;
2298   PetscErrorCode ierr;
2299 
2300   PetscFunctionBegin;
2301   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2302   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2303   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2304   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2305 
2306   process_group_type pg;
2307   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2308   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2309   lgraph_type& level_graph = *lgraph_p;
2310   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2311 
2312   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2313   ilu_permuted(level_graph);
2314 
2315   /* put together the new matrix */
2316   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2317   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2318   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2319   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2320   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2321   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2322   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2323   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2324 
2325   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2326   ierr = PetscContainerSetPointer(c, lgraph_p);
2327   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2328   ierr = PetscContainerDestroy(&c);
2329   PetscFunctionReturn(0);
2330 }
2331 
2332 #undef __FUNCT__
2333 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2334 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2335 {
2336   PetscFunctionBegin;
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatSolve_MPIAIJ"
2342 /*
2343   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2344 */
2345 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2346 {
2347   namespace graph_dist = boost::graph::distributed;
2348 
2349   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2350   lgraph_type    *lgraph_p;
2351   PetscContainer c;
2352   PetscErrorCode ierr;
2353 
2354   PetscFunctionBegin;
2355   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2356   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2357   ierr = VecCopy(b, x);CHKERRQ(ierr);
2358 
2359   PetscScalar *array_x;
2360   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2361   PetscInt sx;
2362   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2363 
2364   PetscScalar *array_b;
2365   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2366   PetscInt sb;
2367   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2368 
2369   lgraph_type& level_graph = *lgraph_p;
2370   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2371 
2372   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2373   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2374   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2375 
2376   typedef boost::iterator_property_map<array_ref_type::iterator,
2377                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2378   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2379   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2380 
2381   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2382   PetscFunctionReturn(0);
2383 }
2384 #endif
2385 
2386 #undef __FUNCT__
2387 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2388 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2389 {
2390   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2391   PetscErrorCode ierr;
2392   PetscInt       i,*idxb = 0;
2393   PetscScalar    *va,*vb;
2394   Vec            vtmp;
2395 
2396   PetscFunctionBegin;
2397   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2398   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2399   if (idx) {
2400     for (i=0; i<A->rmap->n; i++) {
2401       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2402     }
2403   }
2404 
2405   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2406   if (idx) {
2407     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2408   }
2409   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2410   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2411 
2412   for (i=0; i<A->rmap->n; i++) {
2413     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2414       va[i] = vb[i];
2415       if (idx) idx[i] = a->garray[idxb[i]];
2416     }
2417   }
2418 
2419   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2420   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2421   ierr = PetscFree(idxb);CHKERRQ(ierr);
2422   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2428 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2429 {
2430   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2431   PetscErrorCode ierr;
2432   PetscInt       i,*idxb = 0;
2433   PetscScalar    *va,*vb;
2434   Vec            vtmp;
2435 
2436   PetscFunctionBegin;
2437   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2438   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2439   if (idx) {
2440     for (i=0; i<A->cmap->n; i++) {
2441       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2442     }
2443   }
2444 
2445   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2446   if (idx) {
2447     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2448   }
2449   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2450   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2451 
2452   for (i=0; i<A->rmap->n; i++) {
2453     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2454       va[i] = vb[i];
2455       if (idx) idx[i] = a->garray[idxb[i]];
2456     }
2457   }
2458 
2459   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2460   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2461   ierr = PetscFree(idxb);CHKERRQ(ierr);
2462   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 #undef __FUNCT__
2467 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2468 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2469 {
2470   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2471   PetscInt       n      = A->rmap->n;
2472   PetscInt       cstart = A->cmap->rstart;
2473   PetscInt       *cmap  = mat->garray;
2474   PetscInt       *diagIdx, *offdiagIdx;
2475   Vec            diagV, offdiagV;
2476   PetscScalar    *a, *diagA, *offdiagA;
2477   PetscInt       r;
2478   PetscErrorCode ierr;
2479 
2480   PetscFunctionBegin;
2481   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2482   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2483   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2484   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2485   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2486   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2487   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2488   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2489   for (r = 0; r < n; ++r) {
2490     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2491       a[r]   = diagA[r];
2492       idx[r] = cstart + diagIdx[r];
2493     } else {
2494       a[r]   = offdiagA[r];
2495       idx[r] = cmap[offdiagIdx[r]];
2496     }
2497   }
2498   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2499   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2500   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2501   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2502   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2503   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2504   PetscFunctionReturn(0);
2505 }
2506 
2507 #undef __FUNCT__
2508 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2509 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2510 {
2511   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2512   PetscInt       n      = A->rmap->n;
2513   PetscInt       cstart = A->cmap->rstart;
2514   PetscInt       *cmap  = mat->garray;
2515   PetscInt       *diagIdx, *offdiagIdx;
2516   Vec            diagV, offdiagV;
2517   PetscScalar    *a, *diagA, *offdiagA;
2518   PetscInt       r;
2519   PetscErrorCode ierr;
2520 
2521   PetscFunctionBegin;
2522   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2523   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2524   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2525   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2526   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2527   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2528   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2529   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2530   for (r = 0; r < n; ++r) {
2531     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2532       a[r]   = diagA[r];
2533       idx[r] = cstart + diagIdx[r];
2534     } else {
2535       a[r]   = offdiagA[r];
2536       idx[r] = cmap[offdiagIdx[r]];
2537     }
2538   }
2539   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2540   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2541   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2542   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2543   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2544   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 #undef __FUNCT__
2549 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2550 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2551 {
2552   PetscErrorCode ierr;
2553   Mat            *dummy;
2554 
2555   PetscFunctionBegin;
2556   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2557   *newmat = *dummy;
2558   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 #undef __FUNCT__
2563 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2564 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2565 {
2566   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2567   PetscErrorCode ierr;
2568 
2569   PetscFunctionBegin;
2570   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 #undef __FUNCT__
2575 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2576 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2577 {
2578   PetscErrorCode ierr;
2579   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2580 
2581   PetscFunctionBegin;
2582   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2583   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2584   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2585   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 /* -------------------------------------------------------------------*/
2590 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2591                                        MatGetRow_MPIAIJ,
2592                                        MatRestoreRow_MPIAIJ,
2593                                        MatMult_MPIAIJ,
2594                                 /* 4*/ MatMultAdd_MPIAIJ,
2595                                        MatMultTranspose_MPIAIJ,
2596                                        MatMultTransposeAdd_MPIAIJ,
2597 #if defined(PETSC_HAVE_PBGL)
2598                                        MatSolve_MPIAIJ,
2599 #else
2600                                        0,
2601 #endif
2602                                        0,
2603                                        0,
2604                                 /*10*/ 0,
2605                                        0,
2606                                        0,
2607                                        MatSOR_MPIAIJ,
2608                                        MatTranspose_MPIAIJ,
2609                                 /*15*/ MatGetInfo_MPIAIJ,
2610                                        MatEqual_MPIAIJ,
2611                                        MatGetDiagonal_MPIAIJ,
2612                                        MatDiagonalScale_MPIAIJ,
2613                                        MatNorm_MPIAIJ,
2614                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2615                                        MatAssemblyEnd_MPIAIJ,
2616                                        MatSetOption_MPIAIJ,
2617                                        MatZeroEntries_MPIAIJ,
2618                                 /*24*/ MatZeroRows_MPIAIJ,
2619                                        0,
2620 #if defined(PETSC_HAVE_PBGL)
2621                                        0,
2622 #else
2623                                        0,
2624 #endif
2625                                        0,
2626                                        0,
2627                                 /*29*/ MatSetUp_MPIAIJ,
2628 #if defined(PETSC_HAVE_PBGL)
2629                                        0,
2630 #else
2631                                        0,
2632 #endif
2633                                        0,
2634                                        0,
2635                                        0,
2636                                 /*34*/ MatDuplicate_MPIAIJ,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                 /*39*/ MatAXPY_MPIAIJ,
2642                                        MatGetSubMatrices_MPIAIJ,
2643                                        MatIncreaseOverlap_MPIAIJ,
2644                                        MatGetValues_MPIAIJ,
2645                                        MatCopy_MPIAIJ,
2646                                 /*44*/ MatGetRowMax_MPIAIJ,
2647                                        MatScale_MPIAIJ,
2648                                        0,
2649                                        MatDiagonalSet_MPIAIJ,
2650                                        MatZeroRowsColumns_MPIAIJ,
2651                                 /*49*/ MatSetRandom_MPIAIJ,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2657                                        0,
2658                                        MatSetUnfactored_MPIAIJ,
2659                                        MatPermute_MPIAIJ,
2660                                        0,
2661                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2662                                        MatDestroy_MPIAIJ,
2663                                        MatView_MPIAIJ,
2664                                        0,
2665                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2666                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2667                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2672                                        MatGetRowMinAbs_MPIAIJ,
2673                                        0,
2674                                        MatSetColoring_MPIAIJ,
2675                                        0,
2676                                        MatSetValuesAdifor_MPIAIJ,
2677                                 /*75*/ MatFDColoringApply_AIJ,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                        MatFindZeroDiagonals_MPIAIJ,
2682                                 /*80*/ 0,
2683                                        0,
2684                                        0,
2685                                 /*83*/ MatLoad_MPIAIJ,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2692                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2693                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2694                                        MatPtAP_MPIAIJ_MPIAIJ,
2695                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2696                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                 /*99*/ 0,
2702                                        0,
2703                                        0,
2704                                        MatConjugate_MPIAIJ,
2705                                        0,
2706                                 /*104*/MatSetValuesRow_MPIAIJ,
2707                                        MatRealPart_MPIAIJ,
2708                                        MatImaginaryPart_MPIAIJ,
2709                                        0,
2710                                        0,
2711                                 /*109*/0,
2712                                        0,
2713                                        MatGetRowMin_MPIAIJ,
2714                                        0,
2715                                        0,
2716                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                 /*119*/0,
2722                                        0,
2723                                        0,
2724                                        0,
2725                                        MatGetMultiProcBlock_MPIAIJ,
2726                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2727                                        MatGetColumnNorms_MPIAIJ,
2728                                        MatInvertBlockDiagonal_MPIAIJ,
2729                                        0,
2730                                        MatGetSubMatricesParallel_MPIAIJ,
2731                                 /*129*/0,
2732                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2733                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2734                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2735                                        0,
2736                                 /*134*/0,
2737                                        0,
2738                                        0,
2739                                        0,
2740                                        0,
2741                                 /*139*/0,
2742                                        0,
2743                                        0,
2744                                        MatFDColoringSetUp_MPIXAIJ,
2745                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2746                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2747 };
2748 
2749 /* ----------------------------------------------------------------------------------------*/
2750 
2751 #undef __FUNCT__
2752 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2753 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2754 {
2755   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2756   PetscErrorCode ierr;
2757 
2758   PetscFunctionBegin;
2759   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2760   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 #undef __FUNCT__
2765 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2766 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2767 {
2768   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2769   PetscErrorCode ierr;
2770 
2771   PetscFunctionBegin;
2772   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2773   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2774   PetscFunctionReturn(0);
2775 }
2776 
2777 #undef __FUNCT__
2778 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2779 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2780 {
2781   Mat_MPIAIJ     *b;
2782   PetscErrorCode ierr;
2783 
2784   PetscFunctionBegin;
2785   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2786   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2787   b = (Mat_MPIAIJ*)B->data;
2788 
2789   if (!B->preallocated) {
2790     /* Explicitly create 2 MATSEQAIJ matrices. */
2791     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2792     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2793     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2794     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2795     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2796     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2797     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2798     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2799     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2800     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2801   }
2802 
2803   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2804   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2805   B->preallocated = PETSC_TRUE;
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 #undef __FUNCT__
2810 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2811 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2812 {
2813   Mat            mat;
2814   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2815   PetscErrorCode ierr;
2816 
2817   PetscFunctionBegin;
2818   *newmat = 0;
2819   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2820   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2821   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2822   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2823   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2824   a       = (Mat_MPIAIJ*)mat->data;
2825 
2826   mat->factortype   = matin->factortype;
2827   mat->assembled    = PETSC_TRUE;
2828   mat->insertmode   = NOT_SET_VALUES;
2829   mat->preallocated = PETSC_TRUE;
2830 
2831   a->size         = oldmat->size;
2832   a->rank         = oldmat->rank;
2833   a->donotstash   = oldmat->donotstash;
2834   a->roworiented  = oldmat->roworiented;
2835   a->rowindices   = 0;
2836   a->rowvalues    = 0;
2837   a->getrowactive = PETSC_FALSE;
2838 
2839   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2840   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2841 
2842   if (oldmat->colmap) {
2843 #if defined(PETSC_USE_CTABLE)
2844     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2845 #else
2846     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2847     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2848     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2849 #endif
2850   } else a->colmap = 0;
2851   if (oldmat->garray) {
2852     PetscInt len;
2853     len  = oldmat->B->cmap->n;
2854     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2855     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2856     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2857   } else a->garray = 0;
2858 
2859   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2860   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2861   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2862   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2863   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2864   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2865   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2866   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2867   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2868   *newmat = mat;
2869   PetscFunctionReturn(0);
2870 }
2871 
2872 
2873 
2874 #undef __FUNCT__
2875 #define __FUNCT__ "MatLoad_MPIAIJ"
2876 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2877 {
2878   PetscScalar    *vals,*svals;
2879   MPI_Comm       comm;
2880   PetscErrorCode ierr;
2881   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2882   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2883   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2884   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2885   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2886   int            fd;
2887   PetscInt       bs = newMat->rmap->bs;
2888 
2889   PetscFunctionBegin;
2890   /* force binary viewer to load .info file if it has not yet done so */
2891   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2892   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2893   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2894   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2895   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2896   if (!rank) {
2897     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2898     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2899   }
2900 
2901   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2902   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2903   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2904   if (bs < 0) bs = 1;
2905 
2906   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2907 
2908   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2909   M    = header[1]; N = header[2];
2910   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2911   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2912   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2913 
2914   /* If global sizes are set, check if they are consistent with that given in the file */
2915   if (sizesset) {
2916     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2917   }
2918   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2919   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2920 
2921   /* determine ownership of all (block) rows */
2922   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2923   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2924   else m = newMat->rmap->n; /* Set by user */
2925 
2926   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2927   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2928 
2929   /* First process needs enough room for process with most rows */
2930   if (!rank) {
2931     mmax = rowners[1];
2932     for (i=2; i<=size; i++) {
2933       mmax = PetscMax(mmax, rowners[i]);
2934     }
2935   } else mmax = -1;             /* unused, but compilers complain */
2936 
2937   rowners[0] = 0;
2938   for (i=2; i<=size; i++) {
2939     rowners[i] += rowners[i-1];
2940   }
2941   rstart = rowners[rank];
2942   rend   = rowners[rank+1];
2943 
2944   /* distribute row lengths to all processors */
2945   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2946   if (!rank) {
2947     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2948     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2949     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2950     for (j=0; j<m; j++) {
2951       procsnz[0] += ourlens[j];
2952     }
2953     for (i=1; i<size; i++) {
2954       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2955       /* calculate the number of nonzeros on each processor */
2956       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2957         procsnz[i] += rowlengths[j];
2958       }
2959       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2960     }
2961     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2962   } else {
2963     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2964   }
2965 
2966   if (!rank) {
2967     /* determine max buffer needed and allocate it */
2968     maxnz = 0;
2969     for (i=0; i<size; i++) {
2970       maxnz = PetscMax(maxnz,procsnz[i]);
2971     }
2972     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2973 
2974     /* read in my part of the matrix column indices  */
2975     nz   = procsnz[0];
2976     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2977     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2978 
2979     /* read in every one elses and ship off */
2980     for (i=1; i<size; i++) {
2981       nz   = procsnz[i];
2982       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2983       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2984     }
2985     ierr = PetscFree(cols);CHKERRQ(ierr);
2986   } else {
2987     /* determine buffer space needed for message */
2988     nz = 0;
2989     for (i=0; i<m; i++) {
2990       nz += ourlens[i];
2991     }
2992     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2993 
2994     /* receive message of column indices*/
2995     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2996   }
2997 
2998   /* determine column ownership if matrix is not square */
2999   if (N != M) {
3000     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3001     else n = newMat->cmap->n;
3002     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3003     cstart = cend - n;
3004   } else {
3005     cstart = rstart;
3006     cend   = rend;
3007     n      = cend - cstart;
3008   }
3009 
3010   /* loop over local rows, determining number of off diagonal entries */
3011   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3012   jj   = 0;
3013   for (i=0; i<m; i++) {
3014     for (j=0; j<ourlens[i]; j++) {
3015       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3016       jj++;
3017     }
3018   }
3019 
3020   for (i=0; i<m; i++) {
3021     ourlens[i] -= offlens[i];
3022   }
3023   if (!sizesset) {
3024     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3025   }
3026 
3027   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3028 
3029   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3030 
3031   for (i=0; i<m; i++) {
3032     ourlens[i] += offlens[i];
3033   }
3034 
3035   if (!rank) {
3036     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3037 
3038     /* read in my part of the matrix numerical values  */
3039     nz   = procsnz[0];
3040     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3041 
3042     /* insert into matrix */
3043     jj      = rstart;
3044     smycols = mycols;
3045     svals   = vals;
3046     for (i=0; i<m; i++) {
3047       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3048       smycols += ourlens[i];
3049       svals   += ourlens[i];
3050       jj++;
3051     }
3052 
3053     /* read in other processors and ship out */
3054     for (i=1; i<size; i++) {
3055       nz   = procsnz[i];
3056       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3057       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3058     }
3059     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3060   } else {
3061     /* receive numeric values */
3062     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3063 
3064     /* receive message of values*/
3065     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3066 
3067     /* insert into matrix */
3068     jj      = rstart;
3069     smycols = mycols;
3070     svals   = vals;
3071     for (i=0; i<m; i++) {
3072       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3073       smycols += ourlens[i];
3074       svals   += ourlens[i];
3075       jj++;
3076     }
3077   }
3078   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3079   ierr = PetscFree(vals);CHKERRQ(ierr);
3080   ierr = PetscFree(mycols);CHKERRQ(ierr);
3081   ierr = PetscFree(rowners);CHKERRQ(ierr);
3082   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3083   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3084   PetscFunctionReturn(0);
3085 }
3086 
3087 #undef __FUNCT__
3088 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3089 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3090 {
3091   PetscErrorCode ierr;
3092   IS             iscol_local;
3093   PetscInt       csize;
3094 
3095   PetscFunctionBegin;
3096   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3097   if (call == MAT_REUSE_MATRIX) {
3098     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3099     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3100   } else {
3101     PetscInt cbs;
3102     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3103     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3104     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3105   }
3106   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3107   if (call == MAT_INITIAL_MATRIX) {
3108     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3109     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3110   }
3111   PetscFunctionReturn(0);
3112 }
3113 
3114 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3115 #undef __FUNCT__
3116 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3117 /*
3118     Not great since it makes two copies of the submatrix, first an SeqAIJ
3119   in local and then by concatenating the local matrices the end result.
3120   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3121 
3122   Note: This requires a sequential iscol with all indices.
3123 */
3124 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3125 {
3126   PetscErrorCode ierr;
3127   PetscMPIInt    rank,size;
3128   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3129   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3130   PetscBool      allcolumns, colflag;
3131   Mat            M,Mreuse;
3132   MatScalar      *vwork,*aa;
3133   MPI_Comm       comm;
3134   Mat_SeqAIJ     *aij;
3135 
3136   PetscFunctionBegin;
3137   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3138   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3139   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3140 
3141   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3142   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3143   if (colflag && ncol == mat->cmap->N) {
3144     allcolumns = PETSC_TRUE;
3145   } else {
3146     allcolumns = PETSC_FALSE;
3147   }
3148   if (call ==  MAT_REUSE_MATRIX) {
3149     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3150     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3151     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3152   } else {
3153     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3154   }
3155 
3156   /*
3157       m - number of local rows
3158       n - number of columns (same on all processors)
3159       rstart - first row in new global matrix generated
3160   */
3161   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3162   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3163   if (call == MAT_INITIAL_MATRIX) {
3164     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3165     ii  = aij->i;
3166     jj  = aij->j;
3167 
3168     /*
3169         Determine the number of non-zeros in the diagonal and off-diagonal
3170         portions of the matrix in order to do correct preallocation
3171     */
3172 
3173     /* first get start and end of "diagonal" columns */
3174     if (csize == PETSC_DECIDE) {
3175       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3176       if (mglobal == n) { /* square matrix */
3177         nlocal = m;
3178       } else {
3179         nlocal = n/size + ((n % size) > rank);
3180       }
3181     } else {
3182       nlocal = csize;
3183     }
3184     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3185     rstart = rend - nlocal;
3186     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3187 
3188     /* next, compute all the lengths */
3189     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3190     olens = dlens + m;
3191     for (i=0; i<m; i++) {
3192       jend = ii[i+1] - ii[i];
3193       olen = 0;
3194       dlen = 0;
3195       for (j=0; j<jend; j++) {
3196         if (*jj < rstart || *jj >= rend) olen++;
3197         else dlen++;
3198         jj++;
3199       }
3200       olens[i] = olen;
3201       dlens[i] = dlen;
3202     }
3203     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3204     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3205     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3206     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3207     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3208     ierr = PetscFree(dlens);CHKERRQ(ierr);
3209   } else {
3210     PetscInt ml,nl;
3211 
3212     M    = *newmat;
3213     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3214     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3215     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3216     /*
3217          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3218        rather than the slower MatSetValues().
3219     */
3220     M->was_assembled = PETSC_TRUE;
3221     M->assembled     = PETSC_FALSE;
3222   }
3223   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3224   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3225   ii   = aij->i;
3226   jj   = aij->j;
3227   aa   = aij->a;
3228   for (i=0; i<m; i++) {
3229     row   = rstart + i;
3230     nz    = ii[i+1] - ii[i];
3231     cwork = jj;     jj += nz;
3232     vwork = aa;     aa += nz;
3233     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3234   }
3235 
3236   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3237   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3238   *newmat = M;
3239 
3240   /* save submatrix used in processor for next request */
3241   if (call ==  MAT_INITIAL_MATRIX) {
3242     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3243     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3244   }
3245   PetscFunctionReturn(0);
3246 }
3247 
3248 #undef __FUNCT__
3249 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3250 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3251 {
3252   PetscInt       m,cstart, cend,j,nnz,i,d;
3253   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3254   const PetscInt *JJ;
3255   PetscScalar    *values;
3256   PetscErrorCode ierr;
3257 
3258   PetscFunctionBegin;
3259   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3260 
3261   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3262   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3263   m      = B->rmap->n;
3264   cstart = B->cmap->rstart;
3265   cend   = B->cmap->rend;
3266   rstart = B->rmap->rstart;
3267 
3268   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3269 
3270 #if defined(PETSC_USE_DEBUGGING)
3271   for (i=0; i<m; i++) {
3272     nnz = Ii[i+1]- Ii[i];
3273     JJ  = J + Ii[i];
3274     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3275     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3276     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3277   }
3278 #endif
3279 
3280   for (i=0; i<m; i++) {
3281     nnz     = Ii[i+1]- Ii[i];
3282     JJ      = J + Ii[i];
3283     nnz_max = PetscMax(nnz_max,nnz);
3284     d       = 0;
3285     for (j=0; j<nnz; j++) {
3286       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3287     }
3288     d_nnz[i] = d;
3289     o_nnz[i] = nnz - d;
3290   }
3291   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3292   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3293 
3294   if (v) values = (PetscScalar*)v;
3295   else {
3296     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3297   }
3298 
3299   for (i=0; i<m; i++) {
3300     ii   = i + rstart;
3301     nnz  = Ii[i+1]- Ii[i];
3302     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3303   }
3304   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3305   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3306 
3307   if (!v) {
3308     ierr = PetscFree(values);CHKERRQ(ierr);
3309   }
3310   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3311   PetscFunctionReturn(0);
3312 }
3313 
3314 #undef __FUNCT__
3315 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3316 /*@
3317    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3318    (the default parallel PETSc format).
3319 
3320    Collective on MPI_Comm
3321 
3322    Input Parameters:
3323 +  B - the matrix
3324 .  i - the indices into j for the start of each local row (starts with zero)
3325 .  j - the column indices for each local row (starts with zero)
3326 -  v - optional values in the matrix
3327 
3328    Level: developer
3329 
3330    Notes:
3331        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3332      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3333      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3334 
3335        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3336 
3337        The format which is used for the sparse matrix input, is equivalent to a
3338     row-major ordering.. i.e for the following matrix, the input data expected is
3339     as shown:
3340 
3341         1 0 0
3342         2 0 3     P0
3343        -------
3344         4 5 6     P1
3345 
3346      Process0 [P0]: rows_owned=[0,1]
3347         i =  {0,1,3}  [size = nrow+1  = 2+1]
3348         j =  {0,0,2}  [size = nz = 6]
3349         v =  {1,2,3}  [size = nz = 6]
3350 
3351      Process1 [P1]: rows_owned=[2]
3352         i =  {0,3}    [size = nrow+1  = 1+1]
3353         j =  {0,1,2}  [size = nz = 6]
3354         v =  {4,5,6}  [size = nz = 6]
3355 
3356 .keywords: matrix, aij, compressed row, sparse, parallel
3357 
3358 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3359           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3360 @*/
3361 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3362 {
3363   PetscErrorCode ierr;
3364 
3365   PetscFunctionBegin;
3366   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3367   PetscFunctionReturn(0);
3368 }
3369 
3370 #undef __FUNCT__
3371 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3372 /*@C
3373    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3374    (the default parallel PETSc format).  For good matrix assembly performance
3375    the user should preallocate the matrix storage by setting the parameters
3376    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3377    performance can be increased by more than a factor of 50.
3378 
3379    Collective on MPI_Comm
3380 
3381    Input Parameters:
3382 +  B - the matrix
3383 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3384            (same value is used for all local rows)
3385 .  d_nnz - array containing the number of nonzeros in the various rows of the
3386            DIAGONAL portion of the local submatrix (possibly different for each row)
3387            or NULL, if d_nz is used to specify the nonzero structure.
3388            The size of this array is equal to the number of local rows, i.e 'm'.
3389            For matrices that will be factored, you must leave room for (and set)
3390            the diagonal entry even if it is zero.
3391 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3392            submatrix (same value is used for all local rows).
3393 -  o_nnz - array containing the number of nonzeros in the various rows of the
3394            OFF-DIAGONAL portion of the local submatrix (possibly different for
3395            each row) or NULL, if o_nz is used to specify the nonzero
3396            structure. The size of this array is equal to the number
3397            of local rows, i.e 'm'.
3398 
3399    If the *_nnz parameter is given then the *_nz parameter is ignored
3400 
3401    The AIJ format (also called the Yale sparse matrix format or
3402    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3403    storage.  The stored row and column indices begin with zero.
3404    See Users-Manual: ch_mat for details.
3405 
3406    The parallel matrix is partitioned such that the first m0 rows belong to
3407    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3408    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3409 
3410    The DIAGONAL portion of the local submatrix of a processor can be defined
3411    as the submatrix which is obtained by extraction the part corresponding to
3412    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3413    first row that belongs to the processor, r2 is the last row belonging to
3414    the this processor, and c1-c2 is range of indices of the local part of a
3415    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3416    common case of a square matrix, the row and column ranges are the same and
3417    the DIAGONAL part is also square. The remaining portion of the local
3418    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3419 
3420    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3421 
3422    You can call MatGetInfo() to get information on how effective the preallocation was;
3423    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3424    You can also run with the option -info and look for messages with the string
3425    malloc in them to see if additional memory allocation was needed.
3426 
3427    Example usage:
3428 
3429    Consider the following 8x8 matrix with 34 non-zero values, that is
3430    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3431    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3432    as follows:
3433 
3434 .vb
3435             1  2  0  |  0  3  0  |  0  4
3436     Proc0   0  5  6  |  7  0  0  |  8  0
3437             9  0 10  | 11  0  0  | 12  0
3438     -------------------------------------
3439            13  0 14  | 15 16 17  |  0  0
3440     Proc1   0 18  0  | 19 20 21  |  0  0
3441             0  0  0  | 22 23  0  | 24  0
3442     -------------------------------------
3443     Proc2  25 26 27  |  0  0 28  | 29  0
3444            30  0  0  | 31 32 33  |  0 34
3445 .ve
3446 
3447    This can be represented as a collection of submatrices as:
3448 
3449 .vb
3450       A B C
3451       D E F
3452       G H I
3453 .ve
3454 
3455    Where the submatrices A,B,C are owned by proc0, D,E,F are
3456    owned by proc1, G,H,I are owned by proc2.
3457 
3458    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3459    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3460    The 'M','N' parameters are 8,8, and have the same values on all procs.
3461 
3462    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3463    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3464    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3465    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3466    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3467    matrix, ans [DF] as another SeqAIJ matrix.
3468 
3469    When d_nz, o_nz parameters are specified, d_nz storage elements are
3470    allocated for every row of the local diagonal submatrix, and o_nz
3471    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3472    One way to choose d_nz and o_nz is to use the max nonzerors per local
3473    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3474    In this case, the values of d_nz,o_nz are:
3475 .vb
3476      proc0 : dnz = 2, o_nz = 2
3477      proc1 : dnz = 3, o_nz = 2
3478      proc2 : dnz = 1, o_nz = 4
3479 .ve
3480    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3481    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3482    for proc3. i.e we are using 12+15+10=37 storage locations to store
3483    34 values.
3484 
3485    When d_nnz, o_nnz parameters are specified, the storage is specified
3486    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3487    In the above case the values for d_nnz,o_nnz are:
3488 .vb
3489      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3490      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3491      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3492 .ve
3493    Here the space allocated is sum of all the above values i.e 34, and
3494    hence pre-allocation is perfect.
3495 
3496    Level: intermediate
3497 
3498 .keywords: matrix, aij, compressed row, sparse, parallel
3499 
3500 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3501           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3502 @*/
3503 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3504 {
3505   PetscErrorCode ierr;
3506 
3507   PetscFunctionBegin;
3508   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3509   PetscValidType(B,1);
3510   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3511   PetscFunctionReturn(0);
3512 }
3513 
3514 #undef __FUNCT__
3515 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3516 /*@
3517      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3518          CSR format the local rows.
3519 
3520    Collective on MPI_Comm
3521 
3522    Input Parameters:
3523 +  comm - MPI communicator
3524 .  m - number of local rows (Cannot be PETSC_DECIDE)
3525 .  n - This value should be the same as the local size used in creating the
3526        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3527        calculated if N is given) For square matrices n is almost always m.
3528 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3529 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3530 .   i - row indices
3531 .   j - column indices
3532 -   a - matrix values
3533 
3534    Output Parameter:
3535 .   mat - the matrix
3536 
3537    Level: intermediate
3538 
3539    Notes:
3540        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3541      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3542      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3543 
3544        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3545 
3546        The format which is used for the sparse matrix input, is equivalent to a
3547     row-major ordering.. i.e for the following matrix, the input data expected is
3548     as shown:
3549 
3550         1 0 0
3551         2 0 3     P0
3552        -------
3553         4 5 6     P1
3554 
3555      Process0 [P0]: rows_owned=[0,1]
3556         i =  {0,1,3}  [size = nrow+1  = 2+1]
3557         j =  {0,0,2}  [size = nz = 6]
3558         v =  {1,2,3}  [size = nz = 6]
3559 
3560      Process1 [P1]: rows_owned=[2]
3561         i =  {0,3}    [size = nrow+1  = 1+1]
3562         j =  {0,1,2}  [size = nz = 6]
3563         v =  {4,5,6}  [size = nz = 6]
3564 
3565 .keywords: matrix, aij, compressed row, sparse, parallel
3566 
3567 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3568           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3569 @*/
3570 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3571 {
3572   PetscErrorCode ierr;
3573 
3574   PetscFunctionBegin;
3575   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3576   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3577   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3578   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3579   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3580   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3581   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3582   PetscFunctionReturn(0);
3583 }
3584 
3585 #undef __FUNCT__
3586 #define __FUNCT__ "MatCreateAIJ"
3587 /*@C
3588    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3589    (the default parallel PETSc format).  For good matrix assembly performance
3590    the user should preallocate the matrix storage by setting the parameters
3591    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3592    performance can be increased by more than a factor of 50.
3593 
3594    Collective on MPI_Comm
3595 
3596    Input Parameters:
3597 +  comm - MPI communicator
3598 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3599            This value should be the same as the local size used in creating the
3600            y vector for the matrix-vector product y = Ax.
3601 .  n - This value should be the same as the local size used in creating the
3602        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3603        calculated if N is given) For square matrices n is almost always m.
3604 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3605 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3606 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3607            (same value is used for all local rows)
3608 .  d_nnz - array containing the number of nonzeros in the various rows of the
3609            DIAGONAL portion of the local submatrix (possibly different for each row)
3610            or NULL, if d_nz is used to specify the nonzero structure.
3611            The size of this array is equal to the number of local rows, i.e 'm'.
3612 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3613            submatrix (same value is used for all local rows).
3614 -  o_nnz - array containing the number of nonzeros in the various rows of the
3615            OFF-DIAGONAL portion of the local submatrix (possibly different for
3616            each row) or NULL, if o_nz is used to specify the nonzero
3617            structure. The size of this array is equal to the number
3618            of local rows, i.e 'm'.
3619 
3620    Output Parameter:
3621 .  A - the matrix
3622 
3623    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3624    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3625    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3626 
3627    Notes:
3628    If the *_nnz parameter is given then the *_nz parameter is ignored
3629 
3630    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3631    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3632    storage requirements for this matrix.
3633 
3634    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3635    processor than it must be used on all processors that share the object for
3636    that argument.
3637 
3638    The user MUST specify either the local or global matrix dimensions
3639    (possibly both).
3640 
3641    The parallel matrix is partitioned across processors such that the
3642    first m0 rows belong to process 0, the next m1 rows belong to
3643    process 1, the next m2 rows belong to process 2 etc.. where
3644    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3645    values corresponding to [m x N] submatrix.
3646 
3647    The columns are logically partitioned with the n0 columns belonging
3648    to 0th partition, the next n1 columns belonging to the next
3649    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3650 
3651    The DIAGONAL portion of the local submatrix on any given processor
3652    is the submatrix corresponding to the rows and columns m,n
3653    corresponding to the given processor. i.e diagonal matrix on
3654    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3655    etc. The remaining portion of the local submatrix [m x (N-n)]
3656    constitute the OFF-DIAGONAL portion. The example below better
3657    illustrates this concept.
3658 
3659    For a square global matrix we define each processor's diagonal portion
3660    to be its local rows and the corresponding columns (a square submatrix);
3661    each processor's off-diagonal portion encompasses the remainder of the
3662    local matrix (a rectangular submatrix).
3663 
3664    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3665 
3666    When calling this routine with a single process communicator, a matrix of
3667    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3668    type of communicator, use the construction mechanism:
3669      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3670 
3671    By default, this format uses inodes (identical nodes) when possible.
3672    We search for consecutive rows with the same nonzero structure, thereby
3673    reusing matrix information to achieve increased efficiency.
3674 
3675    Options Database Keys:
3676 +  -mat_no_inode  - Do not use inodes
3677 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3678 -  -mat_aij_oneindex - Internally use indexing starting at 1
3679         rather than 0.  Note that when calling MatSetValues(),
3680         the user still MUST index entries starting at 0!
3681 
3682 
3683    Example usage:
3684 
3685    Consider the following 8x8 matrix with 34 non-zero values, that is
3686    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3687    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3688    as follows:
3689 
3690 .vb
3691             1  2  0  |  0  3  0  |  0  4
3692     Proc0   0  5  6  |  7  0  0  |  8  0
3693             9  0 10  | 11  0  0  | 12  0
3694     -------------------------------------
3695            13  0 14  | 15 16 17  |  0  0
3696     Proc1   0 18  0  | 19 20 21  |  0  0
3697             0  0  0  | 22 23  0  | 24  0
3698     -------------------------------------
3699     Proc2  25 26 27  |  0  0 28  | 29  0
3700            30  0  0  | 31 32 33  |  0 34
3701 .ve
3702 
3703    This can be represented as a collection of submatrices as:
3704 
3705 .vb
3706       A B C
3707       D E F
3708       G H I
3709 .ve
3710 
3711    Where the submatrices A,B,C are owned by proc0, D,E,F are
3712    owned by proc1, G,H,I are owned by proc2.
3713 
3714    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3715    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3716    The 'M','N' parameters are 8,8, and have the same values on all procs.
3717 
3718    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3719    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3720    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3721    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3722    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3723    matrix, ans [DF] as another SeqAIJ matrix.
3724 
3725    When d_nz, o_nz parameters are specified, d_nz storage elements are
3726    allocated for every row of the local diagonal submatrix, and o_nz
3727    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3728    One way to choose d_nz and o_nz is to use the max nonzerors per local
3729    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3730    In this case, the values of d_nz,o_nz are:
3731 .vb
3732      proc0 : dnz = 2, o_nz = 2
3733      proc1 : dnz = 3, o_nz = 2
3734      proc2 : dnz = 1, o_nz = 4
3735 .ve
3736    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3737    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3738    for proc3. i.e we are using 12+15+10=37 storage locations to store
3739    34 values.
3740 
3741    When d_nnz, o_nnz parameters are specified, the storage is specified
3742    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3743    In the above case the values for d_nnz,o_nnz are:
3744 .vb
3745      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3746      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3747      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3748 .ve
3749    Here the space allocated is sum of all the above values i.e 34, and
3750    hence pre-allocation is perfect.
3751 
3752    Level: intermediate
3753 
3754 .keywords: matrix, aij, compressed row, sparse, parallel
3755 
3756 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3757           MPIAIJ, MatCreateMPIAIJWithArrays()
3758 @*/
3759 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3760 {
3761   PetscErrorCode ierr;
3762   PetscMPIInt    size;
3763 
3764   PetscFunctionBegin;
3765   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3766   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3767   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3768   if (size > 1) {
3769     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3770     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3771   } else {
3772     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3773     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3774   }
3775   PetscFunctionReturn(0);
3776 }
3777 
3778 #undef __FUNCT__
3779 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3780 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3781 {
3782   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3783 
3784   PetscFunctionBegin;
3785   if (Ad)     *Ad     = a->A;
3786   if (Ao)     *Ao     = a->B;
3787   if (colmap) *colmap = a->garray;
3788   PetscFunctionReturn(0);
3789 }
3790 
3791 #undef __FUNCT__
3792 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3793 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3794 {
3795   PetscErrorCode ierr;
3796   PetscInt       i;
3797   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3798 
3799   PetscFunctionBegin;
3800   if (coloring->ctype == IS_COLORING_GLOBAL) {
3801     ISColoringValue *allcolors,*colors;
3802     ISColoring      ocoloring;
3803 
3804     /* set coloring for diagonal portion */
3805     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3806 
3807     /* set coloring for off-diagonal portion */
3808     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3809     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3810     for (i=0; i<a->B->cmap->n; i++) {
3811       colors[i] = allcolors[a->garray[i]];
3812     }
3813     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3814     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3815     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3816     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3817   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3818     ISColoringValue *colors;
3819     PetscInt        *larray;
3820     ISColoring      ocoloring;
3821 
3822     /* set coloring for diagonal portion */
3823     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3824     for (i=0; i<a->A->cmap->n; i++) {
3825       larray[i] = i + A->cmap->rstart;
3826     }
3827     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3828     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3829     for (i=0; i<a->A->cmap->n; i++) {
3830       colors[i] = coloring->colors[larray[i]];
3831     }
3832     ierr = PetscFree(larray);CHKERRQ(ierr);
3833     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3834     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3835     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3836 
3837     /* set coloring for off-diagonal portion */
3838     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3839     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3840     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3841     for (i=0; i<a->B->cmap->n; i++) {
3842       colors[i] = coloring->colors[larray[i]];
3843     }
3844     ierr = PetscFree(larray);CHKERRQ(ierr);
3845     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3846     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3847     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3848   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3849   PetscFunctionReturn(0);
3850 }
3851 
3852 #undef __FUNCT__
3853 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3854 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3855 {
3856   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3857   PetscErrorCode ierr;
3858 
3859   PetscFunctionBegin;
3860   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3861   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3862   PetscFunctionReturn(0);
3863 }
3864 
3865 #undef __FUNCT__
3866 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3867 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3868 {
3869   PetscErrorCode ierr;
3870   PetscInt       m,N,i,rstart,nnz,Ii;
3871   PetscInt       *indx;
3872   PetscScalar    *values;
3873 
3874   PetscFunctionBegin;
3875   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3876   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3877     PetscInt       *dnz,*onz,sum,bs,cbs;
3878 
3879     if (n == PETSC_DECIDE) {
3880       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3881     }
3882     /* Check sum(n) = N */
3883     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3884     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3885 
3886     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3887     rstart -= m;
3888 
3889     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3890     for (i=0; i<m; i++) {
3891       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3892       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3893       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3894     }
3895 
3896     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3897     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3898     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3899     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3900     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3901     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3902     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3903   }
3904 
3905   /* numeric phase */
3906   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3907   for (i=0; i<m; i++) {
3908     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3909     Ii   = i + rstart;
3910     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3911     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3912   }
3913   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3914   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3915   PetscFunctionReturn(0);
3916 }
3917 
3918 #undef __FUNCT__
3919 #define __FUNCT__ "MatFileSplit"
3920 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3921 {
3922   PetscErrorCode    ierr;
3923   PetscMPIInt       rank;
3924   PetscInt          m,N,i,rstart,nnz;
3925   size_t            len;
3926   const PetscInt    *indx;
3927   PetscViewer       out;
3928   char              *name;
3929   Mat               B;
3930   const PetscScalar *values;
3931 
3932   PetscFunctionBegin;
3933   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3934   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3935   /* Should this be the type of the diagonal block of A? */
3936   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3937   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3938   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3939   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3940   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3941   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3942   for (i=0; i<m; i++) {
3943     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3944     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3945     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3946   }
3947   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3948   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3949 
3950   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3951   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3952   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3953   sprintf(name,"%s.%d",outfile,rank);
3954   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3955   ierr = PetscFree(name);CHKERRQ(ierr);
3956   ierr = MatView(B,out);CHKERRQ(ierr);
3957   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3958   ierr = MatDestroy(&B);CHKERRQ(ierr);
3959   PetscFunctionReturn(0);
3960 }
3961 
3962 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3963 #undef __FUNCT__
3964 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3965 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3966 {
3967   PetscErrorCode      ierr;
3968   Mat_Merge_SeqsToMPI *merge;
3969   PetscContainer      container;
3970 
3971   PetscFunctionBegin;
3972   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3973   if (container) {
3974     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3975     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3976     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3977     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3978     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3979     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3980     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3981     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3982     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3983     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3984     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3985     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3986     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3987     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3988     ierr = PetscFree(merge);CHKERRQ(ierr);
3989     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3990   }
3991   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3992   PetscFunctionReturn(0);
3993 }
3994 
3995 #include <../src/mat/utils/freespace.h>
3996 #include <petscbt.h>
3997 
3998 #undef __FUNCT__
3999 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4000 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4001 {
4002   PetscErrorCode      ierr;
4003   MPI_Comm            comm;
4004   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4005   PetscMPIInt         size,rank,taga,*len_s;
4006   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4007   PetscInt            proc,m;
4008   PetscInt            **buf_ri,**buf_rj;
4009   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4010   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4011   MPI_Request         *s_waits,*r_waits;
4012   MPI_Status          *status;
4013   MatScalar           *aa=a->a;
4014   MatScalar           **abuf_r,*ba_i;
4015   Mat_Merge_SeqsToMPI *merge;
4016   PetscContainer      container;
4017 
4018   PetscFunctionBegin;
4019   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4020   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4021 
4022   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4023   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4024 
4025   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4026   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4027 
4028   bi     = merge->bi;
4029   bj     = merge->bj;
4030   buf_ri = merge->buf_ri;
4031   buf_rj = merge->buf_rj;
4032 
4033   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4034   owners = merge->rowmap->range;
4035   len_s  = merge->len_s;
4036 
4037   /* send and recv matrix values */
4038   /*-----------------------------*/
4039   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4040   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4041 
4042   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4043   for (proc=0,k=0; proc<size; proc++) {
4044     if (!len_s[proc]) continue;
4045     i    = owners[proc];
4046     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4047     k++;
4048   }
4049 
4050   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4051   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4052   ierr = PetscFree(status);CHKERRQ(ierr);
4053 
4054   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4055   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4056 
4057   /* insert mat values of mpimat */
4058   /*----------------------------*/
4059   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4060   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4061 
4062   for (k=0; k<merge->nrecv; k++) {
4063     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4064     nrows       = *(buf_ri_k[k]);
4065     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4066     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4067   }
4068 
4069   /* set values of ba */
4070   m = merge->rowmap->n;
4071   for (i=0; i<m; i++) {
4072     arow = owners[rank] + i;
4073     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4074     bnzi = bi[i+1] - bi[i];
4075     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4076 
4077     /* add local non-zero vals of this proc's seqmat into ba */
4078     anzi   = ai[arow+1] - ai[arow];
4079     aj     = a->j + ai[arow];
4080     aa     = a->a + ai[arow];
4081     nextaj = 0;
4082     for (j=0; nextaj<anzi; j++) {
4083       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4084         ba_i[j] += aa[nextaj++];
4085       }
4086     }
4087 
4088     /* add received vals into ba */
4089     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4090       /* i-th row */
4091       if (i == *nextrow[k]) {
4092         anzi   = *(nextai[k]+1) - *nextai[k];
4093         aj     = buf_rj[k] + *(nextai[k]);
4094         aa     = abuf_r[k] + *(nextai[k]);
4095         nextaj = 0;
4096         for (j=0; nextaj<anzi; j++) {
4097           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4098             ba_i[j] += aa[nextaj++];
4099           }
4100         }
4101         nextrow[k]++; nextai[k]++;
4102       }
4103     }
4104     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4105   }
4106   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4107   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4108 
4109   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4110   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4111   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4112   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4113   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4114   PetscFunctionReturn(0);
4115 }
4116 
4117 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4118 
4119 #undef __FUNCT__
4120 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4121 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4122 {
4123   PetscErrorCode      ierr;
4124   Mat                 B_mpi;
4125   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4126   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4127   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4128   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4129   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4130   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4131   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4132   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4133   MPI_Status          *status;
4134   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4135   PetscBT             lnkbt;
4136   Mat_Merge_SeqsToMPI *merge;
4137   PetscContainer      container;
4138 
4139   PetscFunctionBegin;
4140   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4141 
4142   /* make sure it is a PETSc comm */
4143   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4144   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4145   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4146 
4147   ierr = PetscNew(&merge);CHKERRQ(ierr);
4148   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4149 
4150   /* determine row ownership */
4151   /*---------------------------------------------------------*/
4152   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4153   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4154   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4155   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4156   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4157   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4158   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4159 
4160   m      = merge->rowmap->n;
4161   owners = merge->rowmap->range;
4162 
4163   /* determine the number of messages to send, their lengths */
4164   /*---------------------------------------------------------*/
4165   len_s = merge->len_s;
4166 
4167   len          = 0; /* length of buf_si[] */
4168   merge->nsend = 0;
4169   for (proc=0; proc<size; proc++) {
4170     len_si[proc] = 0;
4171     if (proc == rank) {
4172       len_s[proc] = 0;
4173     } else {
4174       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4175       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4176     }
4177     if (len_s[proc]) {
4178       merge->nsend++;
4179       nrows = 0;
4180       for (i=owners[proc]; i<owners[proc+1]; i++) {
4181         if (ai[i+1] > ai[i]) nrows++;
4182       }
4183       len_si[proc] = 2*(nrows+1);
4184       len         += len_si[proc];
4185     }
4186   }
4187 
4188   /* determine the number and length of messages to receive for ij-structure */
4189   /*-------------------------------------------------------------------------*/
4190   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4191   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4192 
4193   /* post the Irecv of j-structure */
4194   /*-------------------------------*/
4195   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4196   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4197 
4198   /* post the Isend of j-structure */
4199   /*--------------------------------*/
4200   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4201 
4202   for (proc=0, k=0; proc<size; proc++) {
4203     if (!len_s[proc]) continue;
4204     i    = owners[proc];
4205     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4206     k++;
4207   }
4208 
4209   /* receives and sends of j-structure are complete */
4210   /*------------------------------------------------*/
4211   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4212   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4213 
4214   /* send and recv i-structure */
4215   /*---------------------------*/
4216   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4217   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4218 
4219   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4220   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4221   for (proc=0,k=0; proc<size; proc++) {
4222     if (!len_s[proc]) continue;
4223     /* form outgoing message for i-structure:
4224          buf_si[0]:                 nrows to be sent
4225                [1:nrows]:           row index (global)
4226                [nrows+1:2*nrows+1]: i-structure index
4227     */
4228     /*-------------------------------------------*/
4229     nrows       = len_si[proc]/2 - 1;
4230     buf_si_i    = buf_si + nrows+1;
4231     buf_si[0]   = nrows;
4232     buf_si_i[0] = 0;
4233     nrows       = 0;
4234     for (i=owners[proc]; i<owners[proc+1]; i++) {
4235       anzi = ai[i+1] - ai[i];
4236       if (anzi) {
4237         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4238         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4239         nrows++;
4240       }
4241     }
4242     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4243     k++;
4244     buf_si += len_si[proc];
4245   }
4246 
4247   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4248   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4249 
4250   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4251   for (i=0; i<merge->nrecv; i++) {
4252     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4253   }
4254 
4255   ierr = PetscFree(len_si);CHKERRQ(ierr);
4256   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4257   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4258   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4259   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4260   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4261   ierr = PetscFree(status);CHKERRQ(ierr);
4262 
4263   /* compute a local seq matrix in each processor */
4264   /*----------------------------------------------*/
4265   /* allocate bi array and free space for accumulating nonzero column info */
4266   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4267   bi[0] = 0;
4268 
4269   /* create and initialize a linked list */
4270   nlnk = N+1;
4271   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4272 
4273   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4274   len  = ai[owners[rank+1]] - ai[owners[rank]];
4275   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4276 
4277   current_space = free_space;
4278 
4279   /* determine symbolic info for each local row */
4280   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4281 
4282   for (k=0; k<merge->nrecv; k++) {
4283     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4284     nrows       = *buf_ri_k[k];
4285     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4286     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4287   }
4288 
4289   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4290   len  = 0;
4291   for (i=0; i<m; i++) {
4292     bnzi = 0;
4293     /* add local non-zero cols of this proc's seqmat into lnk */
4294     arow  = owners[rank] + i;
4295     anzi  = ai[arow+1] - ai[arow];
4296     aj    = a->j + ai[arow];
4297     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4298     bnzi += nlnk;
4299     /* add received col data into lnk */
4300     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4301       if (i == *nextrow[k]) { /* i-th row */
4302         anzi  = *(nextai[k]+1) - *nextai[k];
4303         aj    = buf_rj[k] + *nextai[k];
4304         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4305         bnzi += nlnk;
4306         nextrow[k]++; nextai[k]++;
4307       }
4308     }
4309     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4310 
4311     /* if free space is not available, make more free space */
4312     if (current_space->local_remaining<bnzi) {
4313       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4314       nspacedouble++;
4315     }
4316     /* copy data into free space, then initialize lnk */
4317     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4318     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4319 
4320     current_space->array           += bnzi;
4321     current_space->local_used      += bnzi;
4322     current_space->local_remaining -= bnzi;
4323 
4324     bi[i+1] = bi[i] + bnzi;
4325   }
4326 
4327   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4328 
4329   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4330   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4331   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4332 
4333   /* create symbolic parallel matrix B_mpi */
4334   /*---------------------------------------*/
4335   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4336   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4337   if (n==PETSC_DECIDE) {
4338     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4339   } else {
4340     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4341   }
4342   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4343   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4344   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4345   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4346   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4347 
4348   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4349   B_mpi->assembled    = PETSC_FALSE;
4350   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4351   merge->bi           = bi;
4352   merge->bj           = bj;
4353   merge->buf_ri       = buf_ri;
4354   merge->buf_rj       = buf_rj;
4355   merge->coi          = NULL;
4356   merge->coj          = NULL;
4357   merge->owners_co    = NULL;
4358 
4359   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4360 
4361   /* attach the supporting struct to B_mpi for reuse */
4362   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4363   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4364   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4365   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4366   *mpimat = B_mpi;
4367 
4368   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4369   PetscFunctionReturn(0);
4370 }
4371 
4372 #undef __FUNCT__
4373 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4374 /*@C
4375       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4376                  matrices from each processor
4377 
4378     Collective on MPI_Comm
4379 
4380    Input Parameters:
4381 +    comm - the communicators the parallel matrix will live on
4382 .    seqmat - the input sequential matrices
4383 .    m - number of local rows (or PETSC_DECIDE)
4384 .    n - number of local columns (or PETSC_DECIDE)
4385 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4386 
4387    Output Parameter:
4388 .    mpimat - the parallel matrix generated
4389 
4390     Level: advanced
4391 
4392    Notes:
4393      The dimensions of the sequential matrix in each processor MUST be the same.
4394      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4395      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4396 @*/
4397 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4398 {
4399   PetscErrorCode ierr;
4400   PetscMPIInt    size;
4401 
4402   PetscFunctionBegin;
4403   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4404   if (size == 1) {
4405     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4406     if (scall == MAT_INITIAL_MATRIX) {
4407       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4408     } else {
4409       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4410     }
4411     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4412     PetscFunctionReturn(0);
4413   }
4414   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4415   if (scall == MAT_INITIAL_MATRIX) {
4416     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4417   }
4418   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4419   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4420   PetscFunctionReturn(0);
4421 }
4422 
4423 #undef __FUNCT__
4424 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4425 /*@
4426      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4427           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4428           with MatGetSize()
4429 
4430     Not Collective
4431 
4432    Input Parameters:
4433 +    A - the matrix
4434 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4435 
4436    Output Parameter:
4437 .    A_loc - the local sequential matrix generated
4438 
4439     Level: developer
4440 
4441 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4442 
4443 @*/
4444 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4445 {
4446   PetscErrorCode ierr;
4447   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4448   Mat_SeqAIJ     *mat,*a,*b;
4449   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4450   MatScalar      *aa,*ba,*cam;
4451   PetscScalar    *ca;
4452   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4453   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4454   PetscBool      match;
4455   MPI_Comm       comm;
4456   PetscMPIInt    size;
4457 
4458   PetscFunctionBegin;
4459   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4460   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4461   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4462   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4463   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4464 
4465   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4466   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4467   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4468   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4469   aa = a->a; ba = b->a;
4470   if (scall == MAT_INITIAL_MATRIX) {
4471     if (size == 1) {
4472       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4473       PetscFunctionReturn(0);
4474     }
4475 
4476     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4477     ci[0] = 0;
4478     for (i=0; i<am; i++) {
4479       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4480     }
4481     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4482     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4483     k    = 0;
4484     for (i=0; i<am; i++) {
4485       ncols_o = bi[i+1] - bi[i];
4486       ncols_d = ai[i+1] - ai[i];
4487       /* off-diagonal portion of A */
4488       for (jo=0; jo<ncols_o; jo++) {
4489         col = cmap[*bj];
4490         if (col >= cstart) break;
4491         cj[k]   = col; bj++;
4492         ca[k++] = *ba++;
4493       }
4494       /* diagonal portion of A */
4495       for (j=0; j<ncols_d; j++) {
4496         cj[k]   = cstart + *aj++;
4497         ca[k++] = *aa++;
4498       }
4499       /* off-diagonal portion of A */
4500       for (j=jo; j<ncols_o; j++) {
4501         cj[k]   = cmap[*bj++];
4502         ca[k++] = *ba++;
4503       }
4504     }
4505     /* put together the new matrix */
4506     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4507     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4508     /* Since these are PETSc arrays, change flags to free them as necessary. */
4509     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4510     mat->free_a  = PETSC_TRUE;
4511     mat->free_ij = PETSC_TRUE;
4512     mat->nonew   = 0;
4513   } else if (scall == MAT_REUSE_MATRIX) {
4514     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4515     ci = mat->i; cj = mat->j; cam = mat->a;
4516     for (i=0; i<am; i++) {
4517       /* off-diagonal portion of A */
4518       ncols_o = bi[i+1] - bi[i];
4519       for (jo=0; jo<ncols_o; jo++) {
4520         col = cmap[*bj];
4521         if (col >= cstart) break;
4522         *cam++ = *ba++; bj++;
4523       }
4524       /* diagonal portion of A */
4525       ncols_d = ai[i+1] - ai[i];
4526       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4527       /* off-diagonal portion of A */
4528       for (j=jo; j<ncols_o; j++) {
4529         *cam++ = *ba++; bj++;
4530       }
4531     }
4532   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4533   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4534   PetscFunctionReturn(0);
4535 }
4536 
4537 #undef __FUNCT__
4538 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4539 /*@C
4540      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4541 
4542     Not Collective
4543 
4544    Input Parameters:
4545 +    A - the matrix
4546 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4547 -    row, col - index sets of rows and columns to extract (or NULL)
4548 
4549    Output Parameter:
4550 .    A_loc - the local sequential matrix generated
4551 
4552     Level: developer
4553 
4554 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4555 
4556 @*/
4557 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4558 {
4559   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4560   PetscErrorCode ierr;
4561   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4562   IS             isrowa,iscola;
4563   Mat            *aloc;
4564   PetscBool      match;
4565 
4566   PetscFunctionBegin;
4567   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4568   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4569   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4570   if (!row) {
4571     start = A->rmap->rstart; end = A->rmap->rend;
4572     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4573   } else {
4574     isrowa = *row;
4575   }
4576   if (!col) {
4577     start = A->cmap->rstart;
4578     cmap  = a->garray;
4579     nzA   = a->A->cmap->n;
4580     nzB   = a->B->cmap->n;
4581     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4582     ncols = 0;
4583     for (i=0; i<nzB; i++) {
4584       if (cmap[i] < start) idx[ncols++] = cmap[i];
4585       else break;
4586     }
4587     imark = i;
4588     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4589     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4590     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4591   } else {
4592     iscola = *col;
4593   }
4594   if (scall != MAT_INITIAL_MATRIX) {
4595     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4596     aloc[0] = *A_loc;
4597   }
4598   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4599   *A_loc = aloc[0];
4600   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4601   if (!row) {
4602     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4603   }
4604   if (!col) {
4605     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4606   }
4607   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4608   PetscFunctionReturn(0);
4609 }
4610 
4611 #undef __FUNCT__
4612 #define __FUNCT__ "MatGetBrowsOfAcols"
4613 /*@C
4614     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4615 
4616     Collective on Mat
4617 
4618    Input Parameters:
4619 +    A,B - the matrices in mpiaij format
4620 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4621 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4622 
4623    Output Parameter:
4624 +    rowb, colb - index sets of rows and columns of B to extract
4625 -    B_seq - the sequential matrix generated
4626 
4627     Level: developer
4628 
4629 @*/
4630 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4631 {
4632   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4633   PetscErrorCode ierr;
4634   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4635   IS             isrowb,iscolb;
4636   Mat            *bseq=NULL;
4637 
4638   PetscFunctionBegin;
4639   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4640     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4641   }
4642   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4643 
4644   if (scall == MAT_INITIAL_MATRIX) {
4645     start = A->cmap->rstart;
4646     cmap  = a->garray;
4647     nzA   = a->A->cmap->n;
4648     nzB   = a->B->cmap->n;
4649     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4650     ncols = 0;
4651     for (i=0; i<nzB; i++) {  /* row < local row index */
4652       if (cmap[i] < start) idx[ncols++] = cmap[i];
4653       else break;
4654     }
4655     imark = i;
4656     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4657     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4658     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4659     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4660   } else {
4661     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4662     isrowb  = *rowb; iscolb = *colb;
4663     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4664     bseq[0] = *B_seq;
4665   }
4666   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4667   *B_seq = bseq[0];
4668   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4669   if (!rowb) {
4670     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4671   } else {
4672     *rowb = isrowb;
4673   }
4674   if (!colb) {
4675     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4676   } else {
4677     *colb = iscolb;
4678   }
4679   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4680   PetscFunctionReturn(0);
4681 }
4682 
4683 #undef __FUNCT__
4684 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4685 /*
4686     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4687     of the OFF-DIAGONAL portion of local A
4688 
4689     Collective on Mat
4690 
4691    Input Parameters:
4692 +    A,B - the matrices in mpiaij format
4693 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4694 
4695    Output Parameter:
4696 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4697 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4698 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4699 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4700 
4701     Level: developer
4702 
4703 */
4704 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4705 {
4706   VecScatter_MPI_General *gen_to,*gen_from;
4707   PetscErrorCode         ierr;
4708   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4709   Mat_SeqAIJ             *b_oth;
4710   VecScatter             ctx =a->Mvctx;
4711   MPI_Comm               comm;
4712   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4713   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4714   PetscScalar            *rvalues,*svalues;
4715   MatScalar              *b_otha,*bufa,*bufA;
4716   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4717   MPI_Request            *rwaits = NULL,*swaits = NULL;
4718   MPI_Status             *sstatus,rstatus;
4719   PetscMPIInt            jj,size;
4720   PetscInt               *cols,sbs,rbs;
4721   PetscScalar            *vals;
4722 
4723   PetscFunctionBegin;
4724   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4725   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4726 
4727   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4728     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4729   }
4730   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4731   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4732 
4733   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4734   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4735   rvalues  = gen_from->values; /* holds the length of receiving row */
4736   svalues  = gen_to->values;   /* holds the length of sending row */
4737   nrecvs   = gen_from->n;
4738   nsends   = gen_to->n;
4739 
4740   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4741   srow    = gen_to->indices;    /* local row index to be sent */
4742   sstarts = gen_to->starts;
4743   sprocs  = gen_to->procs;
4744   sstatus = gen_to->sstatus;
4745   sbs     = gen_to->bs;
4746   rstarts = gen_from->starts;
4747   rprocs  = gen_from->procs;
4748   rbs     = gen_from->bs;
4749 
4750   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4751   if (scall == MAT_INITIAL_MATRIX) {
4752     /* i-array */
4753     /*---------*/
4754     /*  post receives */
4755     for (i=0; i<nrecvs; i++) {
4756       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4757       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4758       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4759     }
4760 
4761     /* pack the outgoing message */
4762     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4763 
4764     sstartsj[0] = 0;
4765     rstartsj[0] = 0;
4766     len         = 0; /* total length of j or a array to be sent */
4767     k           = 0;
4768     for (i=0; i<nsends; i++) {
4769       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4770       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4771       for (j=0; j<nrows; j++) {
4772         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4773         for (l=0; l<sbs; l++) {
4774           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4775 
4776           rowlen[j*sbs+l] = ncols;
4777 
4778           len += ncols;
4779           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4780         }
4781         k++;
4782       }
4783       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4784 
4785       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4786     }
4787     /* recvs and sends of i-array are completed */
4788     i = nrecvs;
4789     while (i--) {
4790       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4791     }
4792     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4793 
4794     /* allocate buffers for sending j and a arrays */
4795     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4796     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4797 
4798     /* create i-array of B_oth */
4799     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4800 
4801     b_othi[0] = 0;
4802     len       = 0; /* total length of j or a array to be received */
4803     k         = 0;
4804     for (i=0; i<nrecvs; i++) {
4805       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4806       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4807       for (j=0; j<nrows; j++) {
4808         b_othi[k+1] = b_othi[k] + rowlen[j];
4809         len        += rowlen[j]; k++;
4810       }
4811       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4812     }
4813 
4814     /* allocate space for j and a arrrays of B_oth */
4815     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4816     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4817 
4818     /* j-array */
4819     /*---------*/
4820     /*  post receives of j-array */
4821     for (i=0; i<nrecvs; i++) {
4822       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4823       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4824     }
4825 
4826     /* pack the outgoing message j-array */
4827     k = 0;
4828     for (i=0; i<nsends; i++) {
4829       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4830       bufJ  = bufj+sstartsj[i];
4831       for (j=0; j<nrows; j++) {
4832         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4833         for (ll=0; ll<sbs; ll++) {
4834           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4835           for (l=0; l<ncols; l++) {
4836             *bufJ++ = cols[l];
4837           }
4838           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4839         }
4840       }
4841       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4842     }
4843 
4844     /* recvs and sends of j-array are completed */
4845     i = nrecvs;
4846     while (i--) {
4847       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4848     }
4849     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4850   } else if (scall == MAT_REUSE_MATRIX) {
4851     sstartsj = *startsj_s;
4852     rstartsj = *startsj_r;
4853     bufa     = *bufa_ptr;
4854     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4855     b_otha   = b_oth->a;
4856   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4857 
4858   /* a-array */
4859   /*---------*/
4860   /*  post receives of a-array */
4861   for (i=0; i<nrecvs; i++) {
4862     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4863     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4864   }
4865 
4866   /* pack the outgoing message a-array */
4867   k = 0;
4868   for (i=0; i<nsends; i++) {
4869     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4870     bufA  = bufa+sstartsj[i];
4871     for (j=0; j<nrows; j++) {
4872       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4873       for (ll=0; ll<sbs; ll++) {
4874         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4875         for (l=0; l<ncols; l++) {
4876           *bufA++ = vals[l];
4877         }
4878         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4879       }
4880     }
4881     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4882   }
4883   /* recvs and sends of a-array are completed */
4884   i = nrecvs;
4885   while (i--) {
4886     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4887   }
4888   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4889   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4890 
4891   if (scall == MAT_INITIAL_MATRIX) {
4892     /* put together the new matrix */
4893     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4894 
4895     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4896     /* Since these are PETSc arrays, change flags to free them as necessary. */
4897     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4898     b_oth->free_a  = PETSC_TRUE;
4899     b_oth->free_ij = PETSC_TRUE;
4900     b_oth->nonew   = 0;
4901 
4902     ierr = PetscFree(bufj);CHKERRQ(ierr);
4903     if (!startsj_s || !bufa_ptr) {
4904       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4905       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4906     } else {
4907       *startsj_s = sstartsj;
4908       *startsj_r = rstartsj;
4909       *bufa_ptr  = bufa;
4910     }
4911   }
4912   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4913   PetscFunctionReturn(0);
4914 }
4915 
4916 #undef __FUNCT__
4917 #define __FUNCT__ "MatGetCommunicationStructs"
4918 /*@C
4919   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4920 
4921   Not Collective
4922 
4923   Input Parameters:
4924 . A - The matrix in mpiaij format
4925 
4926   Output Parameter:
4927 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4928 . colmap - A map from global column index to local index into lvec
4929 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4930 
4931   Level: developer
4932 
4933 @*/
4934 #if defined(PETSC_USE_CTABLE)
4935 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4936 #else
4937 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4938 #endif
4939 {
4940   Mat_MPIAIJ *a;
4941 
4942   PetscFunctionBegin;
4943   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4944   PetscValidPointer(lvec, 2);
4945   PetscValidPointer(colmap, 3);
4946   PetscValidPointer(multScatter, 4);
4947   a = (Mat_MPIAIJ*) A->data;
4948   if (lvec) *lvec = a->lvec;
4949   if (colmap) *colmap = a->colmap;
4950   if (multScatter) *multScatter = a->Mvctx;
4951   PetscFunctionReturn(0);
4952 }
4953 
4954 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4955 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4956 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4957 #if defined(PETSC_HAVE_ELEMENTAL)
4958 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4959 #endif
4960 
4961 #undef __FUNCT__
4962 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4963 /*
4964     Computes (B'*A')' since computing B*A directly is untenable
4965 
4966                n                       p                          p
4967         (              )       (              )         (                  )
4968       m (      A       )  *  n (       B      )   =   m (         C        )
4969         (              )       (              )         (                  )
4970 
4971 */
4972 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4973 {
4974   PetscErrorCode ierr;
4975   Mat            At,Bt,Ct;
4976 
4977   PetscFunctionBegin;
4978   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4979   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4980   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4981   ierr = MatDestroy(&At);CHKERRQ(ierr);
4982   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4983   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4984   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4985   PetscFunctionReturn(0);
4986 }
4987 
4988 #undef __FUNCT__
4989 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4990 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4991 {
4992   PetscErrorCode ierr;
4993   PetscInt       m=A->rmap->n,n=B->cmap->n;
4994   Mat            Cmat;
4995 
4996   PetscFunctionBegin;
4997   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4998   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4999   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5000   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5001   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5002   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5003   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5004   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5005 
5006   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5007 
5008   *C = Cmat;
5009   PetscFunctionReturn(0);
5010 }
5011 
5012 /* ----------------------------------------------------------------*/
5013 #undef __FUNCT__
5014 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5015 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5016 {
5017   PetscErrorCode ierr;
5018 
5019   PetscFunctionBegin;
5020   if (scall == MAT_INITIAL_MATRIX) {
5021     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5022     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5023     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5024   }
5025   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5026   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5027   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5028   PetscFunctionReturn(0);
5029 }
5030 
5031 /*MC
5032    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5033 
5034    Options Database Keys:
5035 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5036 
5037   Level: beginner
5038 
5039 .seealso: MatCreateAIJ()
5040 M*/
5041 
5042 #undef __FUNCT__
5043 #define __FUNCT__ "MatCreate_MPIAIJ"
5044 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5045 {
5046   Mat_MPIAIJ     *b;
5047   PetscErrorCode ierr;
5048   PetscMPIInt    size;
5049 
5050   PetscFunctionBegin;
5051   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5052 
5053   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5054   B->data       = (void*)b;
5055   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5056   B->assembled  = PETSC_FALSE;
5057   B->insertmode = NOT_SET_VALUES;
5058   b->size       = size;
5059 
5060   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5061 
5062   /* build cache for off array entries formed */
5063   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5064 
5065   b->donotstash  = PETSC_FALSE;
5066   b->colmap      = 0;
5067   b->garray      = 0;
5068   b->roworiented = PETSC_TRUE;
5069 
5070   /* stuff used for matrix vector multiply */
5071   b->lvec  = NULL;
5072   b->Mvctx = NULL;
5073 
5074   /* stuff for MatGetRow() */
5075   b->rowindices   = 0;
5076   b->rowvalues    = 0;
5077   b->getrowactive = PETSC_FALSE;
5078 
5079   /* flexible pointer used in CUSP/CUSPARSE classes */
5080   b->spptr = NULL;
5081 
5082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5092 #if defined(PETSC_HAVE_ELEMENTAL)
5093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5094 #endif
5095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5098   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5099   PetscFunctionReturn(0);
5100 }
5101 
5102 #undef __FUNCT__
5103 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5104 /*@C
5105      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5106          and "off-diagonal" part of the matrix in CSR format.
5107 
5108    Collective on MPI_Comm
5109 
5110    Input Parameters:
5111 +  comm - MPI communicator
5112 .  m - number of local rows (Cannot be PETSC_DECIDE)
5113 .  n - This value should be the same as the local size used in creating the
5114        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5115        calculated if N is given) For square matrices n is almost always m.
5116 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5117 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5118 .   i - row indices for "diagonal" portion of matrix
5119 .   j - column indices
5120 .   a - matrix values
5121 .   oi - row indices for "off-diagonal" portion of matrix
5122 .   oj - column indices
5123 -   oa - matrix values
5124 
5125    Output Parameter:
5126 .   mat - the matrix
5127 
5128    Level: advanced
5129 
5130    Notes:
5131        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5132        must free the arrays once the matrix has been destroyed and not before.
5133 
5134        The i and j indices are 0 based
5135 
5136        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5137 
5138        This sets local rows and cannot be used to set off-processor values.
5139 
5140        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5141        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5142        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5143        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5144        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5145        communication if it is known that only local entries will be set.
5146 
5147 .keywords: matrix, aij, compressed row, sparse, parallel
5148 
5149 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5150           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5151 @*/
5152 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5153 {
5154   PetscErrorCode ierr;
5155   Mat_MPIAIJ     *maij;
5156 
5157   PetscFunctionBegin;
5158   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5159   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5160   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5161   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5162   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5163   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5164   maij = (Mat_MPIAIJ*) (*mat)->data;
5165 
5166   (*mat)->preallocated = PETSC_TRUE;
5167 
5168   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5169   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5170 
5171   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5172   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5173 
5174   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5175   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5176   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5177   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5178 
5179   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5180   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5181   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5182   PetscFunctionReturn(0);
5183 }
5184 
5185 /*
5186     Special version for direct calls from Fortran
5187 */
5188 #include <petsc-private/fortranimpl.h>
5189 
5190 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5191 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5192 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5193 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5194 #endif
5195 
5196 /* Change these macros so can be used in void function */
5197 #undef CHKERRQ
5198 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5199 #undef SETERRQ2
5200 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5201 #undef SETERRQ3
5202 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5203 #undef SETERRQ
5204 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5205 
5206 #undef __FUNCT__
5207 #define __FUNCT__ "matsetvaluesmpiaij_"
5208 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5209 {
5210   Mat            mat  = *mmat;
5211   PetscInt       m    = *mm, n = *mn;
5212   InsertMode     addv = *maddv;
5213   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5214   PetscScalar    value;
5215   PetscErrorCode ierr;
5216 
5217   MatCheckPreallocated(mat,1);
5218   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5219 
5220 #if defined(PETSC_USE_DEBUG)
5221   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5222 #endif
5223   {
5224     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5225     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5226     PetscBool roworiented = aij->roworiented;
5227 
5228     /* Some Variables required in the macro */
5229     Mat        A                 = aij->A;
5230     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5231     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5232     MatScalar  *aa               = a->a;
5233     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5234     Mat        B                 = aij->B;
5235     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5236     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5237     MatScalar  *ba               = b->a;
5238 
5239     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5240     PetscInt  nonew = a->nonew;
5241     MatScalar *ap1,*ap2;
5242 
5243     PetscFunctionBegin;
5244     for (i=0; i<m; i++) {
5245       if (im[i] < 0) continue;
5246 #if defined(PETSC_USE_DEBUG)
5247       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5248 #endif
5249       if (im[i] >= rstart && im[i] < rend) {
5250         row      = im[i] - rstart;
5251         lastcol1 = -1;
5252         rp1      = aj + ai[row];
5253         ap1      = aa + ai[row];
5254         rmax1    = aimax[row];
5255         nrow1    = ailen[row];
5256         low1     = 0;
5257         high1    = nrow1;
5258         lastcol2 = -1;
5259         rp2      = bj + bi[row];
5260         ap2      = ba + bi[row];
5261         rmax2    = bimax[row];
5262         nrow2    = bilen[row];
5263         low2     = 0;
5264         high2    = nrow2;
5265 
5266         for (j=0; j<n; j++) {
5267           if (roworiented) value = v[i*n+j];
5268           else value = v[i+j*m];
5269           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5270           if (in[j] >= cstart && in[j] < cend) {
5271             col = in[j] - cstart;
5272             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5273           } else if (in[j] < 0) continue;
5274 #if defined(PETSC_USE_DEBUG)
5275           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5276 #endif
5277           else {
5278             if (mat->was_assembled) {
5279               if (!aij->colmap) {
5280                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5281               }
5282 #if defined(PETSC_USE_CTABLE)
5283               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5284               col--;
5285 #else
5286               col = aij->colmap[in[j]] - 1;
5287 #endif
5288               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5289                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5290                 col  =  in[j];
5291                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5292                 B     = aij->B;
5293                 b     = (Mat_SeqAIJ*)B->data;
5294                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5295                 rp2   = bj + bi[row];
5296                 ap2   = ba + bi[row];
5297                 rmax2 = bimax[row];
5298                 nrow2 = bilen[row];
5299                 low2  = 0;
5300                 high2 = nrow2;
5301                 bm    = aij->B->rmap->n;
5302                 ba    = b->a;
5303               }
5304             } else col = in[j];
5305             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5306           }
5307         }
5308       } else if (!aij->donotstash) {
5309         if (roworiented) {
5310           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5311         } else {
5312           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5313         }
5314       }
5315     }
5316   }
5317   PetscFunctionReturnVoid();
5318 }
5319 
5320