xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5efd42a4d4e0ae8b540c20ac6de7da7f5376070a)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *owners = A->rmap->range;
787   PetscInt       n      = A->rmap->n;
788   PetscSF        sf;
789   PetscInt      *lrows;
790   PetscSFNode   *rrows;
791   PetscInt       r, p = 0, len = 0;
792   PetscErrorCode ierr;
793 
794   PetscFunctionBegin;
795   /* Create SF where leaves are input rows and roots are owned rows */
796   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
797   for (r = 0; r < n; ++r) lrows[r] = -1;
798   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
799   for (r = 0; r < N; ++r) {
800     const PetscInt idx   = rows[r];
801     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
802     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
803       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
804     }
805     if (A->nooffproczerorows) {
806       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
807       lrows[len++] = idx - owners[p];
808     } else {
809       rrows[r].rank = p;
810       rrows[r].index = rows[r] - owners[p];
811     }
812   }
813   if (!A->nooffproczerorows) {
814     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
815     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
816     /* Collect flags for rows to be zeroed */
817     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
818     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
819     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
820     /* Compress and put in row numbers */
821     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
822   }
823   /* fix right hand side if needed */
824   if (x && b) {
825     const PetscScalar *xx;
826     PetscScalar       *bb;
827 
828     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
829     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
830     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
831     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
833   }
834   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
835   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
836   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
837     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
838   } else if (diag != 0.0) {
839     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
841     for (r = 0; r < len; ++r) {
842       const PetscInt row = lrows[r] + A->rmap->rstart;
843       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
844     }
845     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
846     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
847   } else {
848     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
849   }
850   ierr = PetscFree(lrows);CHKERRQ(ierr);
851 
852   /* only change matrix nonzero state if pattern was allowed to be changed */
853   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
854     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
855     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
856   }
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
862 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
865   PetscErrorCode    ierr;
866   PetscMPIInt       n = A->rmap->n;
867   PetscInt          i,j,r,m,p = 0,len = 0;
868   PetscInt          *lrows,*owners = A->rmap->range;
869   PetscSFNode       *rrows;
870   PetscSF           sf;
871   const PetscScalar *xx;
872   PetscScalar       *bb,*mask;
873   Vec               xmask,lmask;
874   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
875   const PetscInt    *aj, *ii,*ridx;
876   PetscScalar       *aa;
877 
878   PetscFunctionBegin;
879   /* Create SF where leaves are input rows and roots are owned rows */
880   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
881   for (r = 0; r < n; ++r) lrows[r] = -1;
882   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
883   for (r = 0; r < N; ++r) {
884     const PetscInt idx   = rows[r];
885     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
886     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
887       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
888     }
889     rrows[r].rank  = p;
890     rrows[r].index = rows[r] - owners[p];
891   }
892   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
893   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
894   /* Collect flags for rows to be zeroed */
895   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
896   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
897   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
898   /* Compress and put in row numbers */
899   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
900   /* zero diagonal part of matrix */
901   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
902   /* handle off diagonal part of matrix */
903   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
904   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
905   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
906   for (i=0; i<len; i++) bb[lrows[i]] = 1;
907   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
908   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
910   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
911   if (x) {
912     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
913     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
915     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
916   }
917   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
918   /* remove zeroed rows of off diagonal matrix */
919   ii = aij->i;
920   for (i=0; i<len; i++) {
921     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
922   }
923   /* loop over all elements of off process part of matrix zeroing removed columns*/
924   if (aij->compressedrow.use) {
925     m    = aij->compressedrow.nrows;
926     ii   = aij->compressedrow.i;
927     ridx = aij->compressedrow.rindex;
928     for (i=0; i<m; i++) {
929       n  = ii[i+1] - ii[i];
930       aj = aij->j + ii[i];
931       aa = aij->a + ii[i];
932 
933       for (j=0; j<n; j++) {
934         if (PetscAbsScalar(mask[*aj])) {
935           if (b) bb[*ridx] -= *aa*xx[*aj];
936           *aa = 0.0;
937         }
938         aa++;
939         aj++;
940       }
941       ridx++;
942     }
943   } else { /* do not use compressed row format */
944     m = l->B->rmap->n;
945     for (i=0; i<m; i++) {
946       n  = ii[i+1] - ii[i];
947       aj = aij->j + ii[i];
948       aa = aij->a + ii[i];
949       for (j=0; j<n; j++) {
950         if (PetscAbsScalar(mask[*aj])) {
951           if (b) bb[i] -= *aa*xx[*aj];
952           *aa = 0.0;
953         }
954         aa++;
955         aj++;
956       }
957     }
958   }
959   if (x) {
960     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
961     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
962   }
963   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
964   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
965   ierr = PetscFree(lrows);CHKERRQ(ierr);
966 
967   /* only change matrix nonzero state if pattern was allowed to be changed */
968   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
969     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
970     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
971   }
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatMult_MPIAIJ"
977 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
978 {
979   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
980   PetscErrorCode ierr;
981   PetscInt       nt;
982 
983   PetscFunctionBegin;
984   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
985   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
986   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
988   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
995 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
996 {
997   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
998   PetscErrorCode ierr;
999 
1000   PetscFunctionBegin;
1001   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1007 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1008 {
1009   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1010   PetscErrorCode ierr;
1011 
1012   PetscFunctionBegin;
1013   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1015   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1023 {
1024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1025   PetscErrorCode ierr;
1026   PetscBool      merged;
1027 
1028   PetscFunctionBegin;
1029   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1030   /* do nondiagonal part */
1031   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1032   if (!merged) {
1033     /* send it on its way */
1034     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1035     /* do local part */
1036     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1037     /* receive remote parts: note this assumes the values are not actually */
1038     /* added in yy until the next line, */
1039     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1040   } else {
1041     /* do local part */
1042     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1043     /* send it on its way */
1044     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1045     /* values actually were received in the Begin() but we need to call this nop */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1053 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1054 {
1055   MPI_Comm       comm;
1056   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1057   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1058   IS             Me,Notme;
1059   PetscErrorCode ierr;
1060   PetscInt       M,N,first,last,*notme,i;
1061   PetscMPIInt    size;
1062 
1063   PetscFunctionBegin;
1064   /* Easy test: symmetric diagonal block */
1065   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1066   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1067   if (!*f) PetscFunctionReturn(0);
1068   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1069   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1070   if (size == 1) PetscFunctionReturn(0);
1071 
1072   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1073   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1074   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1075   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1076   for (i=0; i<first; i++) notme[i] = i;
1077   for (i=last; i<M; i++) notme[i-last+first] = i;
1078   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1079   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1080   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1081   Aoff = Aoffs[0];
1082   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1083   Boff = Boffs[0];
1084   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1085   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1086   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1087   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1088   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1089   ierr = PetscFree(notme);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1095 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1096 {
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101   /* do nondiagonal part */
1102   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1103   /* send it on its way */
1104   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1105   /* do local part */
1106   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1107   /* receive remote parts */
1108   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 /*
1113   This only works correctly for square matrices where the subblock A->A is the
1114    diagonal block
1115 */
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1118 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1119 {
1120   PetscErrorCode ierr;
1121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1122 
1123   PetscFunctionBegin;
1124   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1125   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1126   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatScale_MPIAIJ"
1132 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1139   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatDestroy_MPIAIJ"
1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1146 {
1147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151 #if defined(PETSC_USE_LOG)
1152   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1153 #endif
1154   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1155   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1156   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1157   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1158 #if defined(PETSC_USE_CTABLE)
1159   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1160 #else
1161   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1162 #endif
1163   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1164   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1165   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1166   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1167   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1168   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1169 
1170   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1179 #if defined(PETSC_HAVE_ELEMENTAL)
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1181 #endif
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1187 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1188 {
1189   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1190   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1191   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1192   PetscErrorCode ierr;
1193   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1194   int            fd;
1195   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1196   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1197   PetscScalar    *column_values;
1198   PetscInt       message_count,flowcontrolcount;
1199   FILE           *file;
1200 
1201   PetscFunctionBegin;
1202   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1203   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1204   nz   = A->nz + B->nz;
1205   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1206   if (!rank) {
1207     header[0] = MAT_FILE_CLASSID;
1208     header[1] = mat->rmap->N;
1209     header[2] = mat->cmap->N;
1210 
1211     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1213     /* get largest number of rows any processor has */
1214     rlen  = mat->rmap->n;
1215     range = mat->rmap->range;
1216     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1217   } else {
1218     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     rlen = mat->rmap->n;
1220   }
1221 
1222   /* load up the local row counts */
1223   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1224   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1225 
1226   /* store the row lengths to the file */
1227   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1228   if (!rank) {
1229     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1230     for (i=1; i<size; i++) {
1231       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1232       rlen = range[i+1] - range[i];
1233       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1235     }
1236     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1237   } else {
1238     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1239     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1241   }
1242   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1243 
1244   /* load up the local column indices */
1245   nzmax = nz; /* th processor needs space a largest processor needs */
1246   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1248   cnt   = 0;
1249   for (i=0; i<mat->rmap->n; i++) {
1250     for (j=B->i[i]; j<B->i[i+1]; j++) {
1251       if ((col = garray[B->j[j]]) > cstart) break;
1252       column_indices[cnt++] = col;
1253     }
1254     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1255     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1256   }
1257   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1258 
1259   /* store the column indices to the file */
1260   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1261   if (!rank) {
1262     MPI_Status status;
1263     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     for (i=1; i<size; i++) {
1265       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1266       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1267       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1268       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1270     }
1271     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1272   } else {
1273     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1274     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1275     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1277   }
1278   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1279 
1280   /* load up the local column values */
1281   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1282   cnt  = 0;
1283   for (i=0; i<mat->rmap->n; i++) {
1284     for (j=B->i[i]; j<B->i[i+1]; j++) {
1285       if (garray[B->j[j]] > cstart) break;
1286       column_values[cnt++] = B->a[j];
1287     }
1288     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1289     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1290   }
1291   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1292 
1293   /* store the column values to the file */
1294   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1295   if (!rank) {
1296     MPI_Status status;
1297     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1298     for (i=1; i<size; i++) {
1299       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1300       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1301       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1302       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1303       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1304     }
1305     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1306   } else {
1307     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1308     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1309     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1311   }
1312   ierr = PetscFree(column_values);CHKERRQ(ierr);
1313 
1314   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1315   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #include <petscdraw.h>
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1322 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1323 {
1324   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1325   PetscErrorCode    ierr;
1326   PetscMPIInt       rank = aij->rank,size = aij->size;
1327   PetscBool         isdraw,iascii,isbinary;
1328   PetscViewer       sviewer;
1329   PetscViewerFormat format;
1330 
1331   PetscFunctionBegin;
1332   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1333   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1334   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1335   if (iascii) {
1336     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1337     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1338       MatInfo   info;
1339       PetscBool inodes;
1340 
1341       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1342       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1343       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1344       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1345       if (!inodes) {
1346         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1347                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1348       } else {
1349         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1350                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1351       }
1352       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1353       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1354       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1355       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1356       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1357       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1358       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1359       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1360       PetscFunctionReturn(0);
1361     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1362       PetscInt inodecount,inodelimit,*inodes;
1363       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1364       if (inodes) {
1365         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1366       } else {
1367         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1368       }
1369       PetscFunctionReturn(0);
1370     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1371       PetscFunctionReturn(0);
1372     }
1373   } else if (isbinary) {
1374     if (size == 1) {
1375       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1376       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1377     } else {
1378       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1379     }
1380     PetscFunctionReturn(0);
1381   } else if (isdraw) {
1382     PetscDraw draw;
1383     PetscBool isnull;
1384     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1385     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1386     if (isnull) PetscFunctionReturn(0);
1387   }
1388 
1389   {
1390     /* assemble the entire matrix onto first processor. */
1391     Mat        A;
1392     Mat_SeqAIJ *Aloc;
1393     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1394     MatScalar  *a;
1395 
1396     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1397     if (!rank) {
1398       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1399     } else {
1400       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1401     }
1402     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1403     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1404     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1405     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1406     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1407 
1408     /* copy over the A part */
1409     Aloc = (Mat_SeqAIJ*)aij->A->data;
1410     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1411     row  = mat->rmap->rstart;
1412     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1413     for (i=0; i<m; i++) {
1414       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1415       row++;
1416       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1417     }
1418     aj = Aloc->j;
1419     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1420 
1421     /* copy over the B part */
1422     Aloc = (Mat_SeqAIJ*)aij->B->data;
1423     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1424     row  = mat->rmap->rstart;
1425     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1426     ct   = cols;
1427     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1428     for (i=0; i<m; i++) {
1429       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1430       row++;
1431       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1432     }
1433     ierr = PetscFree(ct);CHKERRQ(ierr);
1434     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1435     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1436     /*
1437        Everyone has to call to draw the matrix since the graphics waits are
1438        synchronized across all processors that share the PetscDraw object
1439     */
1440     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1441     if (!rank) {
1442       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1443       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1444     }
1445     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1446     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1447     ierr = MatDestroy(&A);CHKERRQ(ierr);
1448   }
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatView_MPIAIJ"
1454 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1455 {
1456   PetscErrorCode ierr;
1457   PetscBool      iascii,isdraw,issocket,isbinary;
1458 
1459   PetscFunctionBegin;
1460   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1461   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1462   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1463   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1464   if (iascii || isdraw || isbinary || issocket) {
1465     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1466   }
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatSOR_MPIAIJ"
1472 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1473 {
1474   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1475   PetscErrorCode ierr;
1476   Vec            bb1 = 0;
1477   PetscBool      hasop;
1478 
1479   PetscFunctionBegin;
1480   if (flag == SOR_APPLY_UPPER) {
1481     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1482     PetscFunctionReturn(0);
1483   }
1484 
1485   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1486     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1487   }
1488 
1489   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1490     if (flag & SOR_ZERO_INITIAL_GUESS) {
1491       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1492       its--;
1493     }
1494 
1495     while (its--) {
1496       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1497       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1498 
1499       /* update rhs: bb1 = bb - B*x */
1500       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1501       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1502 
1503       /* local sweep */
1504       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1505     }
1506   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1507     if (flag & SOR_ZERO_INITIAL_GUESS) {
1508       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1509       its--;
1510     }
1511     while (its--) {
1512       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1513       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1514 
1515       /* update rhs: bb1 = bb - B*x */
1516       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1517       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1518 
1519       /* local sweep */
1520       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1521     }
1522   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1523     if (flag & SOR_ZERO_INITIAL_GUESS) {
1524       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1525       its--;
1526     }
1527     while (its--) {
1528       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1529       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1530 
1531       /* update rhs: bb1 = bb - B*x */
1532       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1533       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1534 
1535       /* local sweep */
1536       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1537     }
1538   } else if (flag & SOR_EISENSTAT) {
1539     Vec xx1;
1540 
1541     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1542     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1543 
1544     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1545     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1546     if (!mat->diag) {
1547       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1548       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1549     }
1550     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1551     if (hasop) {
1552       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1553     } else {
1554       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1555     }
1556     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1557 
1558     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1559 
1560     /* local sweep */
1561     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1562     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1563     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1564   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1565 
1566   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1567 
1568   matin->errortype = mat->A->errortype;
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatPermute_MPIAIJ"
1574 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1575 {
1576   Mat            aA,aB,Aperm;
1577   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1578   PetscScalar    *aa,*ba;
1579   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1580   PetscSF        rowsf,sf;
1581   IS             parcolp = NULL;
1582   PetscBool      done;
1583   PetscErrorCode ierr;
1584 
1585   PetscFunctionBegin;
1586   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1587   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1588   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1589   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1590 
1591   /* Invert row permutation to find out where my rows should go */
1592   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1593   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1594   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1595   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1596   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1597   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1598 
1599   /* Invert column permutation to find out where my columns should go */
1600   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1601   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1602   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1603   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1604   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1605   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1606   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1607 
1608   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1609   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1610   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1611 
1612   /* Find out where my gcols should go */
1613   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1614   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1615   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1616   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1617   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1618   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1619   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1620   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1621 
1622   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1623   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1624   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1625   for (i=0; i<m; i++) {
1626     PetscInt row = rdest[i],rowner;
1627     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1628     for (j=ai[i]; j<ai[i+1]; j++) {
1629       PetscInt cowner,col = cdest[aj[j]];
1630       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1631       if (rowner == cowner) dnnz[i]++;
1632       else onnz[i]++;
1633     }
1634     for (j=bi[i]; j<bi[i+1]; j++) {
1635       PetscInt cowner,col = gcdest[bj[j]];
1636       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1637       if (rowner == cowner) dnnz[i]++;
1638       else onnz[i]++;
1639     }
1640   }
1641   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1642   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1643   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1644   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1645   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1646 
1647   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1648   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1649   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1650   for (i=0; i<m; i++) {
1651     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1652     PetscInt j0,rowlen;
1653     rowlen = ai[i+1] - ai[i];
1654     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1655       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1656       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1657     }
1658     rowlen = bi[i+1] - bi[i];
1659     for (j0=j=0; j<rowlen; j0=j) {
1660       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1661       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1662     }
1663   }
1664   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1665   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1667   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1668   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1669   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1670   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1671   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1672   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1673   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1674   *B = Aperm;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 #undef __FUNCT__
1679 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1680 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1681 {
1682   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1683   PetscErrorCode ierr;
1684 
1685   PetscFunctionBegin;
1686   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1687   if (ghosts) *ghosts = aij->garray;
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1693 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1694 {
1695   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1696   Mat            A    = mat->A,B = mat->B;
1697   PetscErrorCode ierr;
1698   PetscReal      isend[5],irecv[5];
1699 
1700   PetscFunctionBegin;
1701   info->block_size = 1.0;
1702   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1703 
1704   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1705   isend[3] = info->memory;  isend[4] = info->mallocs;
1706 
1707   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1708 
1709   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1710   isend[3] += info->memory;  isend[4] += info->mallocs;
1711   if (flag == MAT_LOCAL) {
1712     info->nz_used      = isend[0];
1713     info->nz_allocated = isend[1];
1714     info->nz_unneeded  = isend[2];
1715     info->memory       = isend[3];
1716     info->mallocs      = isend[4];
1717   } else if (flag == MAT_GLOBAL_MAX) {
1718     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1719 
1720     info->nz_used      = irecv[0];
1721     info->nz_allocated = irecv[1];
1722     info->nz_unneeded  = irecv[2];
1723     info->memory       = irecv[3];
1724     info->mallocs      = irecv[4];
1725   } else if (flag == MAT_GLOBAL_SUM) {
1726     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1727 
1728     info->nz_used      = irecv[0];
1729     info->nz_allocated = irecv[1];
1730     info->nz_unneeded  = irecv[2];
1731     info->memory       = irecv[3];
1732     info->mallocs      = irecv[4];
1733   }
1734   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1735   info->fill_ratio_needed = 0;
1736   info->factor_mallocs    = 0;
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 #undef __FUNCT__
1741 #define __FUNCT__ "MatSetOption_MPIAIJ"
1742 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1743 {
1744   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1745   PetscErrorCode ierr;
1746 
1747   PetscFunctionBegin;
1748   switch (op) {
1749   case MAT_NEW_NONZERO_LOCATIONS:
1750   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1751   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1752   case MAT_KEEP_NONZERO_PATTERN:
1753   case MAT_NEW_NONZERO_LOCATION_ERR:
1754   case MAT_USE_INODES:
1755   case MAT_IGNORE_ZERO_ENTRIES:
1756     MatCheckPreallocated(A,1);
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1759     break;
1760   case MAT_ROW_ORIENTED:
1761     MatCheckPreallocated(A,1);
1762     a->roworiented = flg;
1763 
1764     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1765     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1766     break;
1767   case MAT_NEW_DIAGONALS:
1768     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1769     break;
1770   case MAT_IGNORE_OFF_PROC_ENTRIES:
1771     a->donotstash = flg;
1772     break;
1773   case MAT_SPD:
1774     A->spd_set = PETSC_TRUE;
1775     A->spd     = flg;
1776     if (flg) {
1777       A->symmetric                  = PETSC_TRUE;
1778       A->structurally_symmetric     = PETSC_TRUE;
1779       A->symmetric_set              = PETSC_TRUE;
1780       A->structurally_symmetric_set = PETSC_TRUE;
1781     }
1782     break;
1783   case MAT_SYMMETRIC:
1784     MatCheckPreallocated(A,1);
1785     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1786     break;
1787   case MAT_STRUCTURALLY_SYMMETRIC:
1788     MatCheckPreallocated(A,1);
1789     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1790     break;
1791   case MAT_HERMITIAN:
1792     MatCheckPreallocated(A,1);
1793     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1794     break;
1795   case MAT_SYMMETRY_ETERNAL:
1796     MatCheckPreallocated(A,1);
1797     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1798     break;
1799   default:
1800     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1801   }
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatGetRow_MPIAIJ"
1807 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1808 {
1809   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1810   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1811   PetscErrorCode ierr;
1812   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1813   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1814   PetscInt       *cmap,*idx_p;
1815 
1816   PetscFunctionBegin;
1817   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1818   mat->getrowactive = PETSC_TRUE;
1819 
1820   if (!mat->rowvalues && (idx || v)) {
1821     /*
1822         allocate enough space to hold information from the longest row.
1823     */
1824     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1825     PetscInt   max = 1,tmp;
1826     for (i=0; i<matin->rmap->n; i++) {
1827       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1828       if (max < tmp) max = tmp;
1829     }
1830     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1831   }
1832 
1833   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1834   lrow = row - rstart;
1835 
1836   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1837   if (!v)   {pvA = 0; pvB = 0;}
1838   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1839   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1840   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1841   nztot = nzA + nzB;
1842 
1843   cmap = mat->garray;
1844   if (v  || idx) {
1845     if (nztot) {
1846       /* Sort by increasing column numbers, assuming A and B already sorted */
1847       PetscInt imark = -1;
1848       if (v) {
1849         *v = v_p = mat->rowvalues;
1850         for (i=0; i<nzB; i++) {
1851           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1852           else break;
1853         }
1854         imark = i;
1855         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1856         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1857       }
1858       if (idx) {
1859         *idx = idx_p = mat->rowindices;
1860         if (imark > -1) {
1861           for (i=0; i<imark; i++) {
1862             idx_p[i] = cmap[cworkB[i]];
1863           }
1864         } else {
1865           for (i=0; i<nzB; i++) {
1866             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1867             else break;
1868           }
1869           imark = i;
1870         }
1871         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1872         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1873       }
1874     } else {
1875       if (idx) *idx = 0;
1876       if (v)   *v   = 0;
1877     }
1878   }
1879   *nz  = nztot;
1880   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1881   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1887 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1888 {
1889   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1890 
1891   PetscFunctionBegin;
1892   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1893   aij->getrowactive = PETSC_FALSE;
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatNorm_MPIAIJ"
1899 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1900 {
1901   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1902   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1903   PetscErrorCode ierr;
1904   PetscInt       i,j,cstart = mat->cmap->rstart;
1905   PetscReal      sum = 0.0;
1906   MatScalar      *v;
1907 
1908   PetscFunctionBegin;
1909   if (aij->size == 1) {
1910     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1911   } else {
1912     if (type == NORM_FROBENIUS) {
1913       v = amat->a;
1914       for (i=0; i<amat->nz; i++) {
1915         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1916       }
1917       v = bmat->a;
1918       for (i=0; i<bmat->nz; i++) {
1919         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1920       }
1921       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1922       *norm = PetscSqrtReal(*norm);
1923       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1924     } else if (type == NORM_1) { /* max column norm */
1925       PetscReal *tmp,*tmp2;
1926       PetscInt  *jj,*garray = aij->garray;
1927       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1928       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1929       *norm = 0.0;
1930       v     = amat->a; jj = amat->j;
1931       for (j=0; j<amat->nz; j++) {
1932         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1933       }
1934       v = bmat->a; jj = bmat->j;
1935       for (j=0; j<bmat->nz; j++) {
1936         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1937       }
1938       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1939       for (j=0; j<mat->cmap->N; j++) {
1940         if (tmp2[j] > *norm) *norm = tmp2[j];
1941       }
1942       ierr = PetscFree(tmp);CHKERRQ(ierr);
1943       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1944       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1945     } else if (type == NORM_INFINITY) { /* max row norm */
1946       PetscReal ntemp = 0.0;
1947       for (j=0; j<aij->A->rmap->n; j++) {
1948         v   = amat->a + amat->i[j];
1949         sum = 0.0;
1950         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1951           sum += PetscAbsScalar(*v); v++;
1952         }
1953         v = bmat->a + bmat->i[j];
1954         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1955           sum += PetscAbsScalar(*v); v++;
1956         }
1957         if (sum > ntemp) ntemp = sum;
1958       }
1959       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1960       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1961     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1962   }
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatTranspose_MPIAIJ"
1968 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1969 {
1970   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1971   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1972   PetscErrorCode ierr;
1973   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1974   PetscInt       cstart = A->cmap->rstart,ncol;
1975   Mat            B;
1976   MatScalar      *array;
1977 
1978   PetscFunctionBegin;
1979   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1980 
1981   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1982   ai = Aloc->i; aj = Aloc->j;
1983   bi = Bloc->i; bj = Bloc->j;
1984   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1985     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1986     PetscSFNode          *oloc;
1987     PETSC_UNUSED PetscSF sf;
1988 
1989     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1990     /* compute d_nnz for preallocation */
1991     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1992     for (i=0; i<ai[ma]; i++) {
1993       d_nnz[aj[i]]++;
1994       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1995     }
1996     /* compute local off-diagonal contributions */
1997     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1998     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1999     /* map those to global */
2000     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2001     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2002     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2003     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2004     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2005     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2006     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2007 
2008     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2009     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2010     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2011     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2012     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2013     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2014   } else {
2015     B    = *matout;
2016     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2017     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2018   }
2019 
2020   /* copy over the A part */
2021   array = Aloc->a;
2022   row   = A->rmap->rstart;
2023   for (i=0; i<ma; i++) {
2024     ncol = ai[i+1]-ai[i];
2025     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2026     row++;
2027     array += ncol; aj += ncol;
2028   }
2029   aj = Aloc->j;
2030   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2031 
2032   /* copy over the B part */
2033   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2034   array = Bloc->a;
2035   row   = A->rmap->rstart;
2036   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2037   cols_tmp = cols;
2038   for (i=0; i<mb; i++) {
2039     ncol = bi[i+1]-bi[i];
2040     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2041     row++;
2042     array += ncol; cols_tmp += ncol;
2043   }
2044   ierr = PetscFree(cols);CHKERRQ(ierr);
2045 
2046   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2047   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2048   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2049     *matout = B;
2050   } else {
2051     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2052   }
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2058 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2059 {
2060   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2061   Mat            a    = aij->A,b = aij->B;
2062   PetscErrorCode ierr;
2063   PetscInt       s1,s2,s3;
2064 
2065   PetscFunctionBegin;
2066   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2067   if (rr) {
2068     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2069     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2070     /* Overlap communication with computation. */
2071     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2072   }
2073   if (ll) {
2074     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2075     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2076     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2077   }
2078   /* scale  the diagonal block */
2079   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2080 
2081   if (rr) {
2082     /* Do a scatter end and then right scale the off-diagonal block */
2083     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2084     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2085   }
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 #undef __FUNCT__
2090 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2091 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2092 {
2093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2094   PetscErrorCode ierr;
2095 
2096   PetscFunctionBegin;
2097   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 #undef __FUNCT__
2102 #define __FUNCT__ "MatEqual_MPIAIJ"
2103 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2104 {
2105   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2106   Mat            a,b,c,d;
2107   PetscBool      flg;
2108   PetscErrorCode ierr;
2109 
2110   PetscFunctionBegin;
2111   a = matA->A; b = matA->B;
2112   c = matB->A; d = matB->B;
2113 
2114   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2115   if (flg) {
2116     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2117   }
2118   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatCopy_MPIAIJ"
2124 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2125 {
2126   PetscErrorCode ierr;
2127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2128   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2129 
2130   PetscFunctionBegin;
2131   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2132   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2133     /* because of the column compression in the off-processor part of the matrix a->B,
2134        the number of columns in a->B and b->B may be different, hence we cannot call
2135        the MatCopy() directly on the two parts. If need be, we can provide a more
2136        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2137        then copying the submatrices */
2138     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2139   } else {
2140     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2141     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2142   }
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 #undef __FUNCT__
2147 #define __FUNCT__ "MatSetUp_MPIAIJ"
2148 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2149 {
2150   PetscErrorCode ierr;
2151 
2152   PetscFunctionBegin;
2153   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2154   PetscFunctionReturn(0);
2155 }
2156 
2157 /*
2158    Computes the number of nonzeros per row needed for preallocation when X and Y
2159    have different nonzero structure.
2160 */
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2163 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2164 {
2165   PetscInt       i,j,k,nzx,nzy;
2166 
2167   PetscFunctionBegin;
2168   /* Set the number of nonzeros in the new matrix */
2169   for (i=0; i<m; i++) {
2170     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2171     nzx = xi[i+1] - xi[i];
2172     nzy = yi[i+1] - yi[i];
2173     nnz[i] = 0;
2174     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2175       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2176       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2177       nnz[i]++;
2178     }
2179     for (; k<nzy; k++) nnz[i]++;
2180   }
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2187 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2188 {
2189   PetscErrorCode ierr;
2190   PetscInt       m = Y->rmap->N;
2191   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2192   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2193 
2194   PetscFunctionBegin;
2195   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatAXPY_MPIAIJ"
2201 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2202 {
2203   PetscErrorCode ierr;
2204   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2205   PetscBLASInt   bnz,one=1;
2206   Mat_SeqAIJ     *x,*y;
2207 
2208   PetscFunctionBegin;
2209   if (str == SAME_NONZERO_PATTERN) {
2210     PetscScalar alpha = a;
2211     x    = (Mat_SeqAIJ*)xx->A->data;
2212     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2213     y    = (Mat_SeqAIJ*)yy->A->data;
2214     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2215     x    = (Mat_SeqAIJ*)xx->B->data;
2216     y    = (Mat_SeqAIJ*)yy->B->data;
2217     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2218     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2219     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2220   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2221     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2222   } else {
2223     Mat      B;
2224     PetscInt *nnz_d,*nnz_o;
2225     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2226     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2227     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2228     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2229     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2230     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2231     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2232     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2233     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2234     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2235     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2236     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2237     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2238     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2239   }
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2244 
2245 #undef __FUNCT__
2246 #define __FUNCT__ "MatConjugate_MPIAIJ"
2247 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2248 {
2249 #if defined(PETSC_USE_COMPLEX)
2250   PetscErrorCode ierr;
2251   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2252 
2253   PetscFunctionBegin;
2254   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2255   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2256 #else
2257   PetscFunctionBegin;
2258 #endif
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 #undef __FUNCT__
2263 #define __FUNCT__ "MatRealPart_MPIAIJ"
2264 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2265 {
2266   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2267   PetscErrorCode ierr;
2268 
2269   PetscFunctionBegin;
2270   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2271   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2272   PetscFunctionReturn(0);
2273 }
2274 
2275 #undef __FUNCT__
2276 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2277 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2278 {
2279   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2280   PetscErrorCode ierr;
2281 
2282   PetscFunctionBegin;
2283   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2284   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 #undef __FUNCT__
2289 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2290 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2291 {
2292   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2293   PetscErrorCode ierr;
2294   PetscInt       i,*idxb = 0;
2295   PetscScalar    *va,*vb;
2296   Vec            vtmp;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2300   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2301   if (idx) {
2302     for (i=0; i<A->rmap->n; i++) {
2303       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2304     }
2305   }
2306 
2307   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2308   if (idx) {
2309     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2310   }
2311   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2312   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2313 
2314   for (i=0; i<A->rmap->n; i++) {
2315     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2316       va[i] = vb[i];
2317       if (idx) idx[i] = a->garray[idxb[i]];
2318     }
2319   }
2320 
2321   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2322   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2323   ierr = PetscFree(idxb);CHKERRQ(ierr);
2324   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2325   PetscFunctionReturn(0);
2326 }
2327 
2328 #undef __FUNCT__
2329 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2330 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2331 {
2332   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2333   PetscErrorCode ierr;
2334   PetscInt       i,*idxb = 0;
2335   PetscScalar    *va,*vb;
2336   Vec            vtmp;
2337 
2338   PetscFunctionBegin;
2339   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2340   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2341   if (idx) {
2342     for (i=0; i<A->cmap->n; i++) {
2343       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2344     }
2345   }
2346 
2347   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2348   if (idx) {
2349     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2350   }
2351   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2352   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2353 
2354   for (i=0; i<A->rmap->n; i++) {
2355     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2356       va[i] = vb[i];
2357       if (idx) idx[i] = a->garray[idxb[i]];
2358     }
2359   }
2360 
2361   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2362   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2363   ierr = PetscFree(idxb);CHKERRQ(ierr);
2364   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 #undef __FUNCT__
2369 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2370 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2371 {
2372   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2373   PetscInt       n      = A->rmap->n;
2374   PetscInt       cstart = A->cmap->rstart;
2375   PetscInt       *cmap  = mat->garray;
2376   PetscInt       *diagIdx, *offdiagIdx;
2377   Vec            diagV, offdiagV;
2378   PetscScalar    *a, *diagA, *offdiagA;
2379   PetscInt       r;
2380   PetscErrorCode ierr;
2381 
2382   PetscFunctionBegin;
2383   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2384   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2385   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2386   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2387   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2388   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2389   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2390   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2391   for (r = 0; r < n; ++r) {
2392     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2393       a[r]   = diagA[r];
2394       idx[r] = cstart + diagIdx[r];
2395     } else {
2396       a[r]   = offdiagA[r];
2397       idx[r] = cmap[offdiagIdx[r]];
2398     }
2399   }
2400   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2401   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2402   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2403   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2404   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2405   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2411 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2412 {
2413   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2414   PetscInt       n      = A->rmap->n;
2415   PetscInt       cstart = A->cmap->rstart;
2416   PetscInt       *cmap  = mat->garray;
2417   PetscInt       *diagIdx, *offdiagIdx;
2418   Vec            diagV, offdiagV;
2419   PetscScalar    *a, *diagA, *offdiagA;
2420   PetscInt       r;
2421   PetscErrorCode ierr;
2422 
2423   PetscFunctionBegin;
2424   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2425   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2426   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2427   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2428   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2429   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2430   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2431   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2432   for (r = 0; r < n; ++r) {
2433     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2434       a[r]   = diagA[r];
2435       idx[r] = cstart + diagIdx[r];
2436     } else {
2437       a[r]   = offdiagA[r];
2438       idx[r] = cmap[offdiagIdx[r]];
2439     }
2440   }
2441   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2442   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2443   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2444   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2445   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2446   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 #undef __FUNCT__
2451 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2452 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2453 {
2454   PetscErrorCode ierr;
2455   Mat            *dummy;
2456 
2457   PetscFunctionBegin;
2458   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2459   *newmat = *dummy;
2460   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2466 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2467 {
2468   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2469   PetscErrorCode ierr;
2470 
2471   PetscFunctionBegin;
2472   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2473   A->errortype = a->A->errortype;
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 #undef __FUNCT__
2478 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2479 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2480 {
2481   PetscErrorCode ierr;
2482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2483 
2484   PetscFunctionBegin;
2485   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2486   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2487   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2488   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #undef __FUNCT__
2493 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2494 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2495 {
2496   PetscFunctionBegin;
2497   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2498   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 #undef __FUNCT__
2503 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2504 /*@
2505    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2506 
2507    Collective on Mat
2508 
2509    Input Parameters:
2510 +    A - the matrix
2511 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2512 
2513  Level: advanced
2514 
2515 @*/
2516 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2517 {
2518   PetscErrorCode       ierr;
2519 
2520   PetscFunctionBegin;
2521   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 #undef __FUNCT__
2526 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2527 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2528 {
2529   PetscErrorCode       ierr;
2530   PetscBool            sc = PETSC_FALSE,flg;
2531 
2532   PetscFunctionBegin;
2533   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2534   ierr = PetscObjectOptionsBegin((PetscObject)A);
2535     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2536     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2537     if (flg) {
2538       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2539     }
2540   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 #undef __FUNCT__
2545 #define __FUNCT__ "MatShift_MPIAIJ"
2546 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2547 {
2548   PetscErrorCode ierr;
2549   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2550   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2551 
2552   PetscFunctionBegin;
2553   if (!Y->preallocated) {
2554     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2555   } else if (!aij->nz) {
2556     PetscInt nonew = aij->nonew;
2557     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2558     aij->nonew = nonew;
2559   }
2560   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2566 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2567 {
2568   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2569   PetscErrorCode ierr;
2570 
2571   PetscFunctionBegin;
2572   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2573   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2574   if (d) {
2575     PetscInt rstart;
2576     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2577     *d += rstart;
2578 
2579   }
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 
2584 /* -------------------------------------------------------------------*/
2585 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2586                                        MatGetRow_MPIAIJ,
2587                                        MatRestoreRow_MPIAIJ,
2588                                        MatMult_MPIAIJ,
2589                                 /* 4*/ MatMultAdd_MPIAIJ,
2590                                        MatMultTranspose_MPIAIJ,
2591                                        MatMultTransposeAdd_MPIAIJ,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                 /*10*/ 0,
2596                                        0,
2597                                        0,
2598                                        MatSOR_MPIAIJ,
2599                                        MatTranspose_MPIAIJ,
2600                                 /*15*/ MatGetInfo_MPIAIJ,
2601                                        MatEqual_MPIAIJ,
2602                                        MatGetDiagonal_MPIAIJ,
2603                                        MatDiagonalScale_MPIAIJ,
2604                                        MatNorm_MPIAIJ,
2605                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2606                                        MatAssemblyEnd_MPIAIJ,
2607                                        MatSetOption_MPIAIJ,
2608                                        MatZeroEntries_MPIAIJ,
2609                                 /*24*/ MatZeroRows_MPIAIJ,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                 /*29*/ MatSetUp_MPIAIJ,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                 /*34*/ MatDuplicate_MPIAIJ,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                 /*39*/ MatAXPY_MPIAIJ,
2625                                        MatGetSubMatrices_MPIAIJ,
2626                                        MatIncreaseOverlap_MPIAIJ,
2627                                        MatGetValues_MPIAIJ,
2628                                        MatCopy_MPIAIJ,
2629                                 /*44*/ MatGetRowMax_MPIAIJ,
2630                                        MatScale_MPIAIJ,
2631                                        MatShift_MPIAIJ,
2632                                        MatDiagonalSet_MPIAIJ,
2633                                        MatZeroRowsColumns_MPIAIJ,
2634                                 /*49*/ MatSetRandom_MPIAIJ,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2640                                        0,
2641                                        MatSetUnfactored_MPIAIJ,
2642                                        MatPermute_MPIAIJ,
2643                                        0,
2644                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2645                                        MatDestroy_MPIAIJ,
2646                                        MatView_MPIAIJ,
2647                                        0,
2648                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2649                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2650                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2655                                        MatGetRowMinAbs_MPIAIJ,
2656                                        0,
2657                                        MatSetColoring_MPIAIJ,
2658                                        0,
2659                                        MatSetValuesAdifor_MPIAIJ,
2660                                 /*75*/ MatFDColoringApply_AIJ,
2661                                        MatSetFromOptions_MPIAIJ,
2662                                        0,
2663                                        0,
2664                                        MatFindZeroDiagonals_MPIAIJ,
2665                                 /*80*/ 0,
2666                                        0,
2667                                        0,
2668                                 /*83*/ MatLoad_MPIAIJ,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2675                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2676                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2677                                        MatPtAP_MPIAIJ_MPIAIJ,
2678                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2679                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                 /*99*/ 0,
2685                                        0,
2686                                        0,
2687                                        MatConjugate_MPIAIJ,
2688                                        0,
2689                                 /*104*/MatSetValuesRow_MPIAIJ,
2690                                        MatRealPart_MPIAIJ,
2691                                        MatImaginaryPart_MPIAIJ,
2692                                        0,
2693                                        0,
2694                                 /*109*/0,
2695                                        0,
2696                                        MatGetRowMin_MPIAIJ,
2697                                        0,
2698                                        MatMissingDiagonal_MPIAIJ,
2699                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2700                                        0,
2701                                        MatGetGhosts_MPIAIJ,
2702                                        0,
2703                                        0,
2704                                 /*119*/0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                        MatGetMultiProcBlock_MPIAIJ,
2709                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2710                                        MatGetColumnNorms_MPIAIJ,
2711                                        MatInvertBlockDiagonal_MPIAIJ,
2712                                        0,
2713                                        MatGetSubMatricesMPI_MPIAIJ,
2714                                 /*129*/0,
2715                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2716                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2717                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2718                                        0,
2719                                 /*134*/0,
2720                                        0,
2721                                        0,
2722                                        0,
2723                                        0,
2724                                 /*139*/0,
2725                                        0,
2726                                        0,
2727                                        MatFDColoringSetUp_MPIXAIJ,
2728                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2729                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2730 };
2731 
2732 /* ----------------------------------------------------------------------------------------*/
2733 
2734 #undef __FUNCT__
2735 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2736 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2737 {
2738   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2739   PetscErrorCode ierr;
2740 
2741   PetscFunctionBegin;
2742   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2743   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2744   PetscFunctionReturn(0);
2745 }
2746 
2747 #undef __FUNCT__
2748 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2749 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2750 {
2751   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2752   PetscErrorCode ierr;
2753 
2754   PetscFunctionBegin;
2755   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2756   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2757   PetscFunctionReturn(0);
2758 }
2759 
2760 #undef __FUNCT__
2761 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2762 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2763 {
2764   Mat_MPIAIJ     *b;
2765   PetscErrorCode ierr;
2766 
2767   PetscFunctionBegin;
2768   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2769   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2770   b = (Mat_MPIAIJ*)B->data;
2771 
2772   if (!B->preallocated) {
2773     /* Explicitly create 2 MATSEQAIJ matrices. */
2774     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2775     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2776     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2777     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2778     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2779     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2780     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2781     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2782     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2783     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2784   }
2785 
2786   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2787   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2788   B->preallocated = PETSC_TRUE;
2789   PetscFunctionReturn(0);
2790 }
2791 
2792 #undef __FUNCT__
2793 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2794 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2795 {
2796   Mat            mat;
2797   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2798   PetscErrorCode ierr;
2799 
2800   PetscFunctionBegin;
2801   *newmat = 0;
2802   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2803   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2804   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2805   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2806   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2807   a       = (Mat_MPIAIJ*)mat->data;
2808 
2809   mat->factortype   = matin->factortype;
2810   mat->assembled    = PETSC_TRUE;
2811   mat->insertmode   = NOT_SET_VALUES;
2812   mat->preallocated = PETSC_TRUE;
2813 
2814   a->size         = oldmat->size;
2815   a->rank         = oldmat->rank;
2816   a->donotstash   = oldmat->donotstash;
2817   a->roworiented  = oldmat->roworiented;
2818   a->rowindices   = 0;
2819   a->rowvalues    = 0;
2820   a->getrowactive = PETSC_FALSE;
2821 
2822   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2823   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2824 
2825   if (oldmat->colmap) {
2826 #if defined(PETSC_USE_CTABLE)
2827     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2828 #else
2829     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2830     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2831     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2832 #endif
2833   } else a->colmap = 0;
2834   if (oldmat->garray) {
2835     PetscInt len;
2836     len  = oldmat->B->cmap->n;
2837     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2838     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2839     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2840   } else a->garray = 0;
2841 
2842   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2843   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2844   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2845   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2846   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2847   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2848   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2849   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2850   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2851   *newmat = mat;
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 
2856 
2857 #undef __FUNCT__
2858 #define __FUNCT__ "MatLoad_MPIAIJ"
2859 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2860 {
2861   PetscScalar    *vals,*svals;
2862   MPI_Comm       comm;
2863   PetscErrorCode ierr;
2864   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2865   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2866   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2867   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2868   PetscInt       cend,cstart,n,*rowners;
2869   int            fd;
2870   PetscInt       bs = newMat->rmap->bs;
2871 
2872   PetscFunctionBegin;
2873   /* force binary viewer to load .info file if it has not yet done so */
2874   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2875   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2876   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2877   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2878   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2879   if (!rank) {
2880     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2881     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2882     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MPIAIJ");
2883   }
2884 
2885   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2886   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2887   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2888   if (bs < 0) bs = 1;
2889 
2890   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2891   M    = header[1]; N = header[2];
2892 
2893   /* If global sizes are set, check if they are consistent with that given in the file */
2894   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2895   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2896 
2897   /* determine ownership of all (block) rows */
2898   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2899   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2900   else m = newMat->rmap->n; /* Set by user */
2901 
2902   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2903   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2904 
2905   /* First process needs enough room for process with most rows */
2906   if (!rank) {
2907     mmax = rowners[1];
2908     for (i=2; i<=size; i++) {
2909       mmax = PetscMax(mmax, rowners[i]);
2910     }
2911   } else mmax = -1;             /* unused, but compilers complain */
2912 
2913   rowners[0] = 0;
2914   for (i=2; i<=size; i++) {
2915     rowners[i] += rowners[i-1];
2916   }
2917   rstart = rowners[rank];
2918   rend   = rowners[rank+1];
2919 
2920   /* distribute row lengths to all processors */
2921   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2922   if (!rank) {
2923     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2924     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2925     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2926     for (j=0; j<m; j++) {
2927       procsnz[0] += ourlens[j];
2928     }
2929     for (i=1; i<size; i++) {
2930       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2931       /* calculate the number of nonzeros on each processor */
2932       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2933         procsnz[i] += rowlengths[j];
2934       }
2935       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2936     }
2937     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2938   } else {
2939     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2940   }
2941 
2942   if (!rank) {
2943     /* determine max buffer needed and allocate it */
2944     maxnz = 0;
2945     for (i=0; i<size; i++) {
2946       maxnz = PetscMax(maxnz,procsnz[i]);
2947     }
2948     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2949 
2950     /* read in my part of the matrix column indices  */
2951     nz   = procsnz[0];
2952     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2953     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2954 
2955     /* read in every one elses and ship off */
2956     for (i=1; i<size; i++) {
2957       nz   = procsnz[i];
2958       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2959       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2960     }
2961     ierr = PetscFree(cols);CHKERRQ(ierr);
2962   } else {
2963     /* determine buffer space needed for message */
2964     nz = 0;
2965     for (i=0; i<m; i++) {
2966       nz += ourlens[i];
2967     }
2968     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2969 
2970     /* receive message of column indices*/
2971     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2972   }
2973 
2974   /* determine column ownership if matrix is not square */
2975   if (N != M) {
2976     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2977     else n = newMat->cmap->n;
2978     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2979     cstart = cend - n;
2980   } else {
2981     cstart = rstart;
2982     cend   = rend;
2983     n      = cend - cstart;
2984   }
2985 
2986   /* loop over local rows, determining number of off diagonal entries */
2987   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2988   jj   = 0;
2989   for (i=0; i<m; i++) {
2990     for (j=0; j<ourlens[i]; j++) {
2991       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2992       jj++;
2993     }
2994   }
2995 
2996   for (i=0; i<m; i++) {
2997     ourlens[i] -= offlens[i];
2998   }
2999   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3000 
3001   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3002 
3003   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3004 
3005   for (i=0; i<m; i++) {
3006     ourlens[i] += offlens[i];
3007   }
3008 
3009   if (!rank) {
3010     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3011 
3012     /* read in my part of the matrix numerical values  */
3013     nz   = procsnz[0];
3014     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3015 
3016     /* insert into matrix */
3017     jj      = rstart;
3018     smycols = mycols;
3019     svals   = vals;
3020     for (i=0; i<m; i++) {
3021       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3022       smycols += ourlens[i];
3023       svals   += ourlens[i];
3024       jj++;
3025     }
3026 
3027     /* read in other processors and ship out */
3028     for (i=1; i<size; i++) {
3029       nz   = procsnz[i];
3030       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3031       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3032     }
3033     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3034   } else {
3035     /* receive numeric values */
3036     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3037 
3038     /* receive message of values*/
3039     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3040 
3041     /* insert into matrix */
3042     jj      = rstart;
3043     smycols = mycols;
3044     svals   = vals;
3045     for (i=0; i<m; i++) {
3046       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3047       smycols += ourlens[i];
3048       svals   += ourlens[i];
3049       jj++;
3050     }
3051   }
3052   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3053   ierr = PetscFree(vals);CHKERRQ(ierr);
3054   ierr = PetscFree(mycols);CHKERRQ(ierr);
3055   ierr = PetscFree(rowners);CHKERRQ(ierr);
3056   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3057   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3058   PetscFunctionReturn(0);
3059 }
3060 
3061 #undef __FUNCT__
3062 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3063 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3064 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3065 {
3066   PetscErrorCode ierr;
3067   IS             iscol_local;
3068   PetscInt       csize;
3069 
3070   PetscFunctionBegin;
3071   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3072   if (call == MAT_REUSE_MATRIX) {
3073     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3074     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3075   } else {
3076     /* check if we are grabbing all columns*/
3077     PetscBool    isstride;
3078     PetscMPIInt  lisstride = 0,gisstride;
3079     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3080     if (isstride) {
3081       PetscInt  start,len,mstart,mlen;
3082       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3083       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3084       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3085       if (mstart == start && mlen-mstart == len) lisstride = 1;
3086     }
3087     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3088     if (gisstride) {
3089       PetscInt N;
3090       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3091       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3092       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3093       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3094     } else {
3095       PetscInt cbs;
3096       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3097       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3098       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3099     }
3100   }
3101   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3102   if (call == MAT_INITIAL_MATRIX) {
3103     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3104     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3105   }
3106   PetscFunctionReturn(0);
3107 }
3108 
3109 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3110 #undef __FUNCT__
3111 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3112 /*
3113     Not great since it makes two copies of the submatrix, first an SeqAIJ
3114   in local and then by concatenating the local matrices the end result.
3115   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3116 
3117   Note: This requires a sequential iscol with all indices.
3118 */
3119 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3120 {
3121   PetscErrorCode ierr;
3122   PetscMPIInt    rank,size;
3123   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3124   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3125   PetscBool      allcolumns, colflag;
3126   Mat            M,Mreuse;
3127   MatScalar      *vwork,*aa;
3128   MPI_Comm       comm;
3129   Mat_SeqAIJ     *aij;
3130 
3131   PetscFunctionBegin;
3132   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3133   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3134   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3135 
3136   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3137   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3138   if (colflag && ncol == mat->cmap->N) {
3139     allcolumns = PETSC_TRUE;
3140     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3141   } else {
3142     allcolumns = PETSC_FALSE;
3143   }
3144   if (call ==  MAT_REUSE_MATRIX) {
3145     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3146     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3147     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3148   } else {
3149     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3150   }
3151 
3152   /*
3153       m - number of local rows
3154       n - number of columns (same on all processors)
3155       rstart - first row in new global matrix generated
3156   */
3157   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3158   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3159   if (call == MAT_INITIAL_MATRIX) {
3160     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3161     ii  = aij->i;
3162     jj  = aij->j;
3163 
3164     /*
3165         Determine the number of non-zeros in the diagonal and off-diagonal
3166         portions of the matrix in order to do correct preallocation
3167     */
3168 
3169     /* first get start and end of "diagonal" columns */
3170     if (csize == PETSC_DECIDE) {
3171       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3172       if (mglobal == n) { /* square matrix */
3173         nlocal = m;
3174       } else {
3175         nlocal = n/size + ((n % size) > rank);
3176       }
3177     } else {
3178       nlocal = csize;
3179     }
3180     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3181     rstart = rend - nlocal;
3182     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3183 
3184     /* next, compute all the lengths */
3185     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3186     olens = dlens + m;
3187     for (i=0; i<m; i++) {
3188       jend = ii[i+1] - ii[i];
3189       olen = 0;
3190       dlen = 0;
3191       for (j=0; j<jend; j++) {
3192         if (*jj < rstart || *jj >= rend) olen++;
3193         else dlen++;
3194         jj++;
3195       }
3196       olens[i] = olen;
3197       dlens[i] = dlen;
3198     }
3199     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3200     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3201     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3202     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3203     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3204     ierr = PetscFree(dlens);CHKERRQ(ierr);
3205   } else {
3206     PetscInt ml,nl;
3207 
3208     M    = *newmat;
3209     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3210     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3211     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3212     /*
3213          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3214        rather than the slower MatSetValues().
3215     */
3216     M->was_assembled = PETSC_TRUE;
3217     M->assembled     = PETSC_FALSE;
3218   }
3219   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3220   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3221   ii   = aij->i;
3222   jj   = aij->j;
3223   aa   = aij->a;
3224   for (i=0; i<m; i++) {
3225     row   = rstart + i;
3226     nz    = ii[i+1] - ii[i];
3227     cwork = jj;     jj += nz;
3228     vwork = aa;     aa += nz;
3229     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3230   }
3231 
3232   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3233   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3234   *newmat = M;
3235 
3236   /* save submatrix used in processor for next request */
3237   if (call ==  MAT_INITIAL_MATRIX) {
3238     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3239     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3240   }
3241   PetscFunctionReturn(0);
3242 }
3243 
3244 #undef __FUNCT__
3245 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3246 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3247 {
3248   PetscInt       m,cstart, cend,j,nnz,i,d;
3249   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3250   const PetscInt *JJ;
3251   PetscScalar    *values;
3252   PetscErrorCode ierr;
3253 
3254   PetscFunctionBegin;
3255   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3256 
3257   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3258   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3259   m      = B->rmap->n;
3260   cstart = B->cmap->rstart;
3261   cend   = B->cmap->rend;
3262   rstart = B->rmap->rstart;
3263 
3264   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3265 
3266 #if defined(PETSC_USE_DEBUGGING)
3267   for (i=0; i<m; i++) {
3268     nnz = Ii[i+1]- Ii[i];
3269     JJ  = J + Ii[i];
3270     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3271     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3272     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3273   }
3274 #endif
3275 
3276   for (i=0; i<m; i++) {
3277     nnz     = Ii[i+1]- Ii[i];
3278     JJ      = J + Ii[i];
3279     nnz_max = PetscMax(nnz_max,nnz);
3280     d       = 0;
3281     for (j=0; j<nnz; j++) {
3282       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3283     }
3284     d_nnz[i] = d;
3285     o_nnz[i] = nnz - d;
3286   }
3287   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3288   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3289 
3290   if (v) values = (PetscScalar*)v;
3291   else {
3292     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3293   }
3294 
3295   for (i=0; i<m; i++) {
3296     ii   = i + rstart;
3297     nnz  = Ii[i+1]- Ii[i];
3298     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3299   }
3300   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3301   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3302 
3303   if (!v) {
3304     ierr = PetscFree(values);CHKERRQ(ierr);
3305   }
3306   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3307   PetscFunctionReturn(0);
3308 }
3309 
3310 #undef __FUNCT__
3311 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3312 /*@
3313    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3314    (the default parallel PETSc format).
3315 
3316    Collective on MPI_Comm
3317 
3318    Input Parameters:
3319 +  B - the matrix
3320 .  i - the indices into j for the start of each local row (starts with zero)
3321 .  j - the column indices for each local row (starts with zero)
3322 -  v - optional values in the matrix
3323 
3324    Level: developer
3325 
3326    Notes:
3327        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3328      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3329      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3330 
3331        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3332 
3333        The format which is used for the sparse matrix input, is equivalent to a
3334     row-major ordering.. i.e for the following matrix, the input data expected is
3335     as shown
3336 
3337 $        1 0 0
3338 $        2 0 3     P0
3339 $       -------
3340 $        4 5 6     P1
3341 $
3342 $     Process0 [P0]: rows_owned=[0,1]
3343 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3344 $        j =  {0,0,2}  [size = 3]
3345 $        v =  {1,2,3}  [size = 3]
3346 $
3347 $     Process1 [P1]: rows_owned=[2]
3348 $        i =  {0,3}    [size = nrow+1  = 1+1]
3349 $        j =  {0,1,2}  [size = 3]
3350 $        v =  {4,5,6}  [size = 3]
3351 
3352 .keywords: matrix, aij, compressed row, sparse, parallel
3353 
3354 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3355           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3356 @*/
3357 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3358 {
3359   PetscErrorCode ierr;
3360 
3361   PetscFunctionBegin;
3362   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3363   PetscFunctionReturn(0);
3364 }
3365 
3366 #undef __FUNCT__
3367 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3368 /*@C
3369    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3370    (the default parallel PETSc format).  For good matrix assembly performance
3371    the user should preallocate the matrix storage by setting the parameters
3372    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3373    performance can be increased by more than a factor of 50.
3374 
3375    Collective on MPI_Comm
3376 
3377    Input Parameters:
3378 +  B - the matrix
3379 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3380            (same value is used for all local rows)
3381 .  d_nnz - array containing the number of nonzeros in the various rows of the
3382            DIAGONAL portion of the local submatrix (possibly different for each row)
3383            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3384            The size of this array is equal to the number of local rows, i.e 'm'.
3385            For matrices that will be factored, you must leave room for (and set)
3386            the diagonal entry even if it is zero.
3387 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3388            submatrix (same value is used for all local rows).
3389 -  o_nnz - array containing the number of nonzeros in the various rows of the
3390            OFF-DIAGONAL portion of the local submatrix (possibly different for
3391            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3392            structure. The size of this array is equal to the number
3393            of local rows, i.e 'm'.
3394 
3395    If the *_nnz parameter is given then the *_nz parameter is ignored
3396 
3397    The AIJ format (also called the Yale sparse matrix format or
3398    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3399    storage.  The stored row and column indices begin with zero.
3400    See Users-Manual: ch_mat for details.
3401 
3402    The parallel matrix is partitioned such that the first m0 rows belong to
3403    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3404    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3405 
3406    The DIAGONAL portion of the local submatrix of a processor can be defined
3407    as the submatrix which is obtained by extraction the part corresponding to
3408    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3409    first row that belongs to the processor, r2 is the last row belonging to
3410    the this processor, and c1-c2 is range of indices of the local part of a
3411    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3412    common case of a square matrix, the row and column ranges are the same and
3413    the DIAGONAL part is also square. The remaining portion of the local
3414    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3415 
3416    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3417 
3418    You can call MatGetInfo() to get information on how effective the preallocation was;
3419    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3420    You can also run with the option -info and look for messages with the string
3421    malloc in them to see if additional memory allocation was needed.
3422 
3423    Example usage:
3424 
3425    Consider the following 8x8 matrix with 34 non-zero values, that is
3426    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3427    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3428    as follows:
3429 
3430 .vb
3431             1  2  0  |  0  3  0  |  0  4
3432     Proc0   0  5  6  |  7  0  0  |  8  0
3433             9  0 10  | 11  0  0  | 12  0
3434     -------------------------------------
3435            13  0 14  | 15 16 17  |  0  0
3436     Proc1   0 18  0  | 19 20 21  |  0  0
3437             0  0  0  | 22 23  0  | 24  0
3438     -------------------------------------
3439     Proc2  25 26 27  |  0  0 28  | 29  0
3440            30  0  0  | 31 32 33  |  0 34
3441 .ve
3442 
3443    This can be represented as a collection of submatrices as:
3444 
3445 .vb
3446       A B C
3447       D E F
3448       G H I
3449 .ve
3450 
3451    Where the submatrices A,B,C are owned by proc0, D,E,F are
3452    owned by proc1, G,H,I are owned by proc2.
3453 
3454    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3455    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3456    The 'M','N' parameters are 8,8, and have the same values on all procs.
3457 
3458    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3459    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3460    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3461    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3462    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3463    matrix, ans [DF] as another SeqAIJ matrix.
3464 
3465    When d_nz, o_nz parameters are specified, d_nz storage elements are
3466    allocated for every row of the local diagonal submatrix, and o_nz
3467    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3468    One way to choose d_nz and o_nz is to use the max nonzerors per local
3469    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3470    In this case, the values of d_nz,o_nz are:
3471 .vb
3472      proc0 : dnz = 2, o_nz = 2
3473      proc1 : dnz = 3, o_nz = 2
3474      proc2 : dnz = 1, o_nz = 4
3475 .ve
3476    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3477    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3478    for proc3. i.e we are using 12+15+10=37 storage locations to store
3479    34 values.
3480 
3481    When d_nnz, o_nnz parameters are specified, the storage is specified
3482    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3483    In the above case the values for d_nnz,o_nnz are:
3484 .vb
3485      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3486      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3487      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3488 .ve
3489    Here the space allocated is sum of all the above values i.e 34, and
3490    hence pre-allocation is perfect.
3491 
3492    Level: intermediate
3493 
3494 .keywords: matrix, aij, compressed row, sparse, parallel
3495 
3496 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3497           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3498 @*/
3499 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3500 {
3501   PetscErrorCode ierr;
3502 
3503   PetscFunctionBegin;
3504   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3505   PetscValidType(B,1);
3506   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3507   PetscFunctionReturn(0);
3508 }
3509 
3510 #undef __FUNCT__
3511 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3512 /*@
3513      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3514          CSR format the local rows.
3515 
3516    Collective on MPI_Comm
3517 
3518    Input Parameters:
3519 +  comm - MPI communicator
3520 .  m - number of local rows (Cannot be PETSC_DECIDE)
3521 .  n - This value should be the same as the local size used in creating the
3522        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3523        calculated if N is given) For square matrices n is almost always m.
3524 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3525 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3526 .   i - row indices
3527 .   j - column indices
3528 -   a - matrix values
3529 
3530    Output Parameter:
3531 .   mat - the matrix
3532 
3533    Level: intermediate
3534 
3535    Notes:
3536        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3537      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3538      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3539 
3540        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3541 
3542        The format which is used for the sparse matrix input, is equivalent to a
3543     row-major ordering.. i.e for the following matrix, the input data expected is
3544     as shown
3545 
3546 $        1 0 0
3547 $        2 0 3     P0
3548 $       -------
3549 $        4 5 6     P1
3550 $
3551 $     Process0 [P0]: rows_owned=[0,1]
3552 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3553 $        j =  {0,0,2}  [size = 3]
3554 $        v =  {1,2,3}  [size = 3]
3555 $
3556 $     Process1 [P1]: rows_owned=[2]
3557 $        i =  {0,3}    [size = nrow+1  = 1+1]
3558 $        j =  {0,1,2}  [size = 3]
3559 $        v =  {4,5,6}  [size = 3]
3560 
3561 .keywords: matrix, aij, compressed row, sparse, parallel
3562 
3563 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3564           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3565 @*/
3566 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3567 {
3568   PetscErrorCode ierr;
3569 
3570   PetscFunctionBegin;
3571   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3572   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3573   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3574   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3575   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3576   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3577   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3578   PetscFunctionReturn(0);
3579 }
3580 
3581 #undef __FUNCT__
3582 #define __FUNCT__ "MatCreateAIJ"
3583 /*@C
3584    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3585    (the default parallel PETSc format).  For good matrix assembly performance
3586    the user should preallocate the matrix storage by setting the parameters
3587    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3588    performance can be increased by more than a factor of 50.
3589 
3590    Collective on MPI_Comm
3591 
3592    Input Parameters:
3593 +  comm - MPI communicator
3594 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3595            This value should be the same as the local size used in creating the
3596            y vector for the matrix-vector product y = Ax.
3597 .  n - This value should be the same as the local size used in creating the
3598        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3599        calculated if N is given) For square matrices n is almost always m.
3600 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3601 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3602 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3603            (same value is used for all local rows)
3604 .  d_nnz - array containing the number of nonzeros in the various rows of the
3605            DIAGONAL portion of the local submatrix (possibly different for each row)
3606            or NULL, if d_nz is used to specify the nonzero structure.
3607            The size of this array is equal to the number of local rows, i.e 'm'.
3608 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3609            submatrix (same value is used for all local rows).
3610 -  o_nnz - array containing the number of nonzeros in the various rows of the
3611            OFF-DIAGONAL portion of the local submatrix (possibly different for
3612            each row) or NULL, if o_nz is used to specify the nonzero
3613            structure. The size of this array is equal to the number
3614            of local rows, i.e 'm'.
3615 
3616    Output Parameter:
3617 .  A - the matrix
3618 
3619    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3620    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3621    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3622 
3623    Notes:
3624    If the *_nnz parameter is given then the *_nz parameter is ignored
3625 
3626    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3627    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3628    storage requirements for this matrix.
3629 
3630    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3631    processor than it must be used on all processors that share the object for
3632    that argument.
3633 
3634    The user MUST specify either the local or global matrix dimensions
3635    (possibly both).
3636 
3637    The parallel matrix is partitioned across processors such that the
3638    first m0 rows belong to process 0, the next m1 rows belong to
3639    process 1, the next m2 rows belong to process 2 etc.. where
3640    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3641    values corresponding to [m x N] submatrix.
3642 
3643    The columns are logically partitioned with the n0 columns belonging
3644    to 0th partition, the next n1 columns belonging to the next
3645    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3646 
3647    The DIAGONAL portion of the local submatrix on any given processor
3648    is the submatrix corresponding to the rows and columns m,n
3649    corresponding to the given processor. i.e diagonal matrix on
3650    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3651    etc. The remaining portion of the local submatrix [m x (N-n)]
3652    constitute the OFF-DIAGONAL portion. The example below better
3653    illustrates this concept.
3654 
3655    For a square global matrix we define each processor's diagonal portion
3656    to be its local rows and the corresponding columns (a square submatrix);
3657    each processor's off-diagonal portion encompasses the remainder of the
3658    local matrix (a rectangular submatrix).
3659 
3660    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3661 
3662    When calling this routine with a single process communicator, a matrix of
3663    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3664    type of communicator, use the construction mechanism
3665 .vb
3666      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3667 .ve
3668 
3669    By default, this format uses inodes (identical nodes) when possible.
3670    We search for consecutive rows with the same nonzero structure, thereby
3671    reusing matrix information to achieve increased efficiency.
3672 
3673    Options Database Keys:
3674 +  -mat_no_inode  - Do not use inodes
3675 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3676 -  -mat_aij_oneindex - Internally use indexing starting at 1
3677         rather than 0.  Note that when calling MatSetValues(),
3678         the user still MUST index entries starting at 0!
3679 
3680 
3681    Example usage:
3682 
3683    Consider the following 8x8 matrix with 34 non-zero values, that is
3684    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3685    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3686    as follows
3687 
3688 .vb
3689             1  2  0  |  0  3  0  |  0  4
3690     Proc0   0  5  6  |  7  0  0  |  8  0
3691             9  0 10  | 11  0  0  | 12  0
3692     -------------------------------------
3693            13  0 14  | 15 16 17  |  0  0
3694     Proc1   0 18  0  | 19 20 21  |  0  0
3695             0  0  0  | 22 23  0  | 24  0
3696     -------------------------------------
3697     Proc2  25 26 27  |  0  0 28  | 29  0
3698            30  0  0  | 31 32 33  |  0 34
3699 .ve
3700 
3701    This can be represented as a collection of submatrices as
3702 
3703 .vb
3704       A B C
3705       D E F
3706       G H I
3707 .ve
3708 
3709    Where the submatrices A,B,C are owned by proc0, D,E,F are
3710    owned by proc1, G,H,I are owned by proc2.
3711 
3712    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3713    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3714    The 'M','N' parameters are 8,8, and have the same values on all procs.
3715 
3716    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3717    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3718    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3719    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3720    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3721    matrix, ans [DF] as another SeqAIJ matrix.
3722 
3723    When d_nz, o_nz parameters are specified, d_nz storage elements are
3724    allocated for every row of the local diagonal submatrix, and o_nz
3725    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3726    One way to choose d_nz and o_nz is to use the max nonzerors per local
3727    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3728    In this case, the values of d_nz,o_nz are
3729 .vb
3730      proc0 : dnz = 2, o_nz = 2
3731      proc1 : dnz = 3, o_nz = 2
3732      proc2 : dnz = 1, o_nz = 4
3733 .ve
3734    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3735    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3736    for proc3. i.e we are using 12+15+10=37 storage locations to store
3737    34 values.
3738 
3739    When d_nnz, o_nnz parameters are specified, the storage is specified
3740    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3741    In the above case the values for d_nnz,o_nnz are
3742 .vb
3743      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3744      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3745      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3746 .ve
3747    Here the space allocated is sum of all the above values i.e 34, and
3748    hence pre-allocation is perfect.
3749 
3750    Level: intermediate
3751 
3752 .keywords: matrix, aij, compressed row, sparse, parallel
3753 
3754 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3755           MPIAIJ, MatCreateMPIAIJWithArrays()
3756 @*/
3757 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3758 {
3759   PetscErrorCode ierr;
3760   PetscMPIInt    size;
3761 
3762   PetscFunctionBegin;
3763   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3764   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3765   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3766   if (size > 1) {
3767     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3768     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3769   } else {
3770     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3771     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3772   }
3773   PetscFunctionReturn(0);
3774 }
3775 
3776 #undef __FUNCT__
3777 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3778 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3779 {
3780   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3781   PetscBool      flg;
3782   PetscErrorCode ierr;
3783 
3784   PetscFunctionBegin;
3785   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3786   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input");
3787   if (Ad)     *Ad     = a->A;
3788   if (Ao)     *Ao     = a->B;
3789   if (colmap) *colmap = a->garray;
3790   PetscFunctionReturn(0);
3791 }
3792 
3793 #undef __FUNCT__
3794 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3795 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3796 {
3797   PetscErrorCode ierr;
3798   PetscInt       i;
3799   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3800 
3801   PetscFunctionBegin;
3802   if (coloring->ctype == IS_COLORING_GLOBAL) {
3803     ISColoringValue *allcolors,*colors;
3804     ISColoring      ocoloring;
3805 
3806     /* set coloring for diagonal portion */
3807     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3808 
3809     /* set coloring for off-diagonal portion */
3810     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3811     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3812     for (i=0; i<a->B->cmap->n; i++) {
3813       colors[i] = allcolors[a->garray[i]];
3814     }
3815     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3816     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3817     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3818     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3819   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3820     ISColoringValue *colors;
3821     PetscInt        *larray;
3822     ISColoring      ocoloring;
3823 
3824     /* set coloring for diagonal portion */
3825     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3826     for (i=0; i<a->A->cmap->n; i++) {
3827       larray[i] = i + A->cmap->rstart;
3828     }
3829     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3830     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3831     for (i=0; i<a->A->cmap->n; i++) {
3832       colors[i] = coloring->colors[larray[i]];
3833     }
3834     ierr = PetscFree(larray);CHKERRQ(ierr);
3835     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3836     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3837     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3838 
3839     /* set coloring for off-diagonal portion */
3840     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3841     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3842     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3843     for (i=0; i<a->B->cmap->n; i++) {
3844       colors[i] = coloring->colors[larray[i]];
3845     }
3846     ierr = PetscFree(larray);CHKERRQ(ierr);
3847     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3848     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3849     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3850   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3851   PetscFunctionReturn(0);
3852 }
3853 
3854 #undef __FUNCT__
3855 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3856 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3857 {
3858   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3859   PetscErrorCode ierr;
3860 
3861   PetscFunctionBegin;
3862   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3863   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3864   PetscFunctionReturn(0);
3865 }
3866 
3867 #undef __FUNCT__
3868 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3869 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3870 {
3871   PetscErrorCode ierr;
3872   PetscInt       m,N,i,rstart,nnz,Ii;
3873   PetscInt       *indx;
3874   PetscScalar    *values;
3875 
3876   PetscFunctionBegin;
3877   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3878   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3879     PetscInt       *dnz,*onz,sum,bs,cbs;
3880 
3881     if (n == PETSC_DECIDE) {
3882       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3883     }
3884     /* Check sum(n) = N */
3885     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3886     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3887 
3888     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3889     rstart -= m;
3890 
3891     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3892     for (i=0; i<m; i++) {
3893       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3894       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3895       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3896     }
3897 
3898     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3899     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3900     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3901     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3902     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3903     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3904     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3905   }
3906 
3907   /* numeric phase */
3908   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3909   for (i=0; i<m; i++) {
3910     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3911     Ii   = i + rstart;
3912     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3913     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3914   }
3915   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3916   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3917   PetscFunctionReturn(0);
3918 }
3919 
3920 #undef __FUNCT__
3921 #define __FUNCT__ "MatFileSplit"
3922 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3923 {
3924   PetscErrorCode    ierr;
3925   PetscMPIInt       rank;
3926   PetscInt          m,N,i,rstart,nnz;
3927   size_t            len;
3928   const PetscInt    *indx;
3929   PetscViewer       out;
3930   char              *name;
3931   Mat               B;
3932   const PetscScalar *values;
3933 
3934   PetscFunctionBegin;
3935   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3936   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3937   /* Should this be the type of the diagonal block of A? */
3938   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3939   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3940   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3941   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3942   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3943   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3944   for (i=0; i<m; i++) {
3945     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3946     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3947     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3948   }
3949   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3950   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3951 
3952   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3953   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3954   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3955   sprintf(name,"%s.%d",outfile,rank);
3956   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3957   ierr = PetscFree(name);CHKERRQ(ierr);
3958   ierr = MatView(B,out);CHKERRQ(ierr);
3959   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3960   ierr = MatDestroy(&B);CHKERRQ(ierr);
3961   PetscFunctionReturn(0);
3962 }
3963 
3964 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3965 #undef __FUNCT__
3966 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3967 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3968 {
3969   PetscErrorCode      ierr;
3970   Mat_Merge_SeqsToMPI *merge;
3971   PetscContainer      container;
3972 
3973   PetscFunctionBegin;
3974   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3975   if (container) {
3976     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3977     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3978     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3979     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3980     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3981     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3982     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3983     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3984     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3985     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3986     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3987     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3988     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3989     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3990     ierr = PetscFree(merge);CHKERRQ(ierr);
3991     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3992   }
3993   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3994   PetscFunctionReturn(0);
3995 }
3996 
3997 #include <../src/mat/utils/freespace.h>
3998 #include <petscbt.h>
3999 
4000 #undef __FUNCT__
4001 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4002 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4003 {
4004   PetscErrorCode      ierr;
4005   MPI_Comm            comm;
4006   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4007   PetscMPIInt         size,rank,taga,*len_s;
4008   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4009   PetscInt            proc,m;
4010   PetscInt            **buf_ri,**buf_rj;
4011   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4012   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4013   MPI_Request         *s_waits,*r_waits;
4014   MPI_Status          *status;
4015   MatScalar           *aa=a->a;
4016   MatScalar           **abuf_r,*ba_i;
4017   Mat_Merge_SeqsToMPI *merge;
4018   PetscContainer      container;
4019 
4020   PetscFunctionBegin;
4021   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4022   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4023 
4024   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4025   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4026 
4027   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4028   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4029 
4030   bi     = merge->bi;
4031   bj     = merge->bj;
4032   buf_ri = merge->buf_ri;
4033   buf_rj = merge->buf_rj;
4034 
4035   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4036   owners = merge->rowmap->range;
4037   len_s  = merge->len_s;
4038 
4039   /* send and recv matrix values */
4040   /*-----------------------------*/
4041   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4042   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4043 
4044   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4045   for (proc=0,k=0; proc<size; proc++) {
4046     if (!len_s[proc]) continue;
4047     i    = owners[proc];
4048     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4049     k++;
4050   }
4051 
4052   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4053   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4054   ierr = PetscFree(status);CHKERRQ(ierr);
4055 
4056   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4057   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4058 
4059   /* insert mat values of mpimat */
4060   /*----------------------------*/
4061   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4062   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4063 
4064   for (k=0; k<merge->nrecv; k++) {
4065     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4066     nrows       = *(buf_ri_k[k]);
4067     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4068     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4069   }
4070 
4071   /* set values of ba */
4072   m = merge->rowmap->n;
4073   for (i=0; i<m; i++) {
4074     arow = owners[rank] + i;
4075     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4076     bnzi = bi[i+1] - bi[i];
4077     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4078 
4079     /* add local non-zero vals of this proc's seqmat into ba */
4080     anzi   = ai[arow+1] - ai[arow];
4081     aj     = a->j + ai[arow];
4082     aa     = a->a + ai[arow];
4083     nextaj = 0;
4084     for (j=0; nextaj<anzi; j++) {
4085       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4086         ba_i[j] += aa[nextaj++];
4087       }
4088     }
4089 
4090     /* add received vals into ba */
4091     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4092       /* i-th row */
4093       if (i == *nextrow[k]) {
4094         anzi   = *(nextai[k]+1) - *nextai[k];
4095         aj     = buf_rj[k] + *(nextai[k]);
4096         aa     = abuf_r[k] + *(nextai[k]);
4097         nextaj = 0;
4098         for (j=0; nextaj<anzi; j++) {
4099           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4100             ba_i[j] += aa[nextaj++];
4101           }
4102         }
4103         nextrow[k]++; nextai[k]++;
4104       }
4105     }
4106     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4107   }
4108   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4109   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4110 
4111   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4112   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4113   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4114   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4115   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4116   PetscFunctionReturn(0);
4117 }
4118 
4119 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4120 
4121 #undef __FUNCT__
4122 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4123 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4124 {
4125   PetscErrorCode      ierr;
4126   Mat                 B_mpi;
4127   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4128   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4129   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4130   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4131   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4132   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4133   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4134   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4135   MPI_Status          *status;
4136   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4137   PetscBT             lnkbt;
4138   Mat_Merge_SeqsToMPI *merge;
4139   PetscContainer      container;
4140 
4141   PetscFunctionBegin;
4142   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4143 
4144   /* make sure it is a PETSc comm */
4145   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4146   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4147   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4148 
4149   ierr = PetscNew(&merge);CHKERRQ(ierr);
4150   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4151 
4152   /* determine row ownership */
4153   /*---------------------------------------------------------*/
4154   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4155   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4156   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4157   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4158   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4159   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4160   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4161 
4162   m      = merge->rowmap->n;
4163   owners = merge->rowmap->range;
4164 
4165   /* determine the number of messages to send, their lengths */
4166   /*---------------------------------------------------------*/
4167   len_s = merge->len_s;
4168 
4169   len          = 0; /* length of buf_si[] */
4170   merge->nsend = 0;
4171   for (proc=0; proc<size; proc++) {
4172     len_si[proc] = 0;
4173     if (proc == rank) {
4174       len_s[proc] = 0;
4175     } else {
4176       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4177       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4178     }
4179     if (len_s[proc]) {
4180       merge->nsend++;
4181       nrows = 0;
4182       for (i=owners[proc]; i<owners[proc+1]; i++) {
4183         if (ai[i+1] > ai[i]) nrows++;
4184       }
4185       len_si[proc] = 2*(nrows+1);
4186       len         += len_si[proc];
4187     }
4188   }
4189 
4190   /* determine the number and length of messages to receive for ij-structure */
4191   /*-------------------------------------------------------------------------*/
4192   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4193   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4194 
4195   /* post the Irecv of j-structure */
4196   /*-------------------------------*/
4197   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4198   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4199 
4200   /* post the Isend of j-structure */
4201   /*--------------------------------*/
4202   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4203 
4204   for (proc=0, k=0; proc<size; proc++) {
4205     if (!len_s[proc]) continue;
4206     i    = owners[proc];
4207     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4208     k++;
4209   }
4210 
4211   /* receives and sends of j-structure are complete */
4212   /*------------------------------------------------*/
4213   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4214   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4215 
4216   /* send and recv i-structure */
4217   /*---------------------------*/
4218   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4219   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4220 
4221   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4222   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4223   for (proc=0,k=0; proc<size; proc++) {
4224     if (!len_s[proc]) continue;
4225     /* form outgoing message for i-structure:
4226          buf_si[0]:                 nrows to be sent
4227                [1:nrows]:           row index (global)
4228                [nrows+1:2*nrows+1]: i-structure index
4229     */
4230     /*-------------------------------------------*/
4231     nrows       = len_si[proc]/2 - 1;
4232     buf_si_i    = buf_si + nrows+1;
4233     buf_si[0]   = nrows;
4234     buf_si_i[0] = 0;
4235     nrows       = 0;
4236     for (i=owners[proc]; i<owners[proc+1]; i++) {
4237       anzi = ai[i+1] - ai[i];
4238       if (anzi) {
4239         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4240         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4241         nrows++;
4242       }
4243     }
4244     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4245     k++;
4246     buf_si += len_si[proc];
4247   }
4248 
4249   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4250   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4251 
4252   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4253   for (i=0; i<merge->nrecv; i++) {
4254     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4255   }
4256 
4257   ierr = PetscFree(len_si);CHKERRQ(ierr);
4258   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4259   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4260   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4261   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4262   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4263   ierr = PetscFree(status);CHKERRQ(ierr);
4264 
4265   /* compute a local seq matrix in each processor */
4266   /*----------------------------------------------*/
4267   /* allocate bi array and free space for accumulating nonzero column info */
4268   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4269   bi[0] = 0;
4270 
4271   /* create and initialize a linked list */
4272   nlnk = N+1;
4273   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4274 
4275   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4276   len  = ai[owners[rank+1]] - ai[owners[rank]];
4277   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4278 
4279   current_space = free_space;
4280 
4281   /* determine symbolic info for each local row */
4282   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4283 
4284   for (k=0; k<merge->nrecv; k++) {
4285     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4286     nrows       = *buf_ri_k[k];
4287     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4288     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4289   }
4290 
4291   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4292   len  = 0;
4293   for (i=0; i<m; i++) {
4294     bnzi = 0;
4295     /* add local non-zero cols of this proc's seqmat into lnk */
4296     arow  = owners[rank] + i;
4297     anzi  = ai[arow+1] - ai[arow];
4298     aj    = a->j + ai[arow];
4299     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4300     bnzi += nlnk;
4301     /* add received col data into lnk */
4302     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4303       if (i == *nextrow[k]) { /* i-th row */
4304         anzi  = *(nextai[k]+1) - *nextai[k];
4305         aj    = buf_rj[k] + *nextai[k];
4306         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4307         bnzi += nlnk;
4308         nextrow[k]++; nextai[k]++;
4309       }
4310     }
4311     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4312 
4313     /* if free space is not available, make more free space */
4314     if (current_space->local_remaining<bnzi) {
4315       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4316       nspacedouble++;
4317     }
4318     /* copy data into free space, then initialize lnk */
4319     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4320     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4321 
4322     current_space->array           += bnzi;
4323     current_space->local_used      += bnzi;
4324     current_space->local_remaining -= bnzi;
4325 
4326     bi[i+1] = bi[i] + bnzi;
4327   }
4328 
4329   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4330 
4331   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4332   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4333   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4334 
4335   /* create symbolic parallel matrix B_mpi */
4336   /*---------------------------------------*/
4337   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4338   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4339   if (n==PETSC_DECIDE) {
4340     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4341   } else {
4342     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4343   }
4344   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4345   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4346   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4347   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4348   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4349 
4350   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4351   B_mpi->assembled    = PETSC_FALSE;
4352   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4353   merge->bi           = bi;
4354   merge->bj           = bj;
4355   merge->buf_ri       = buf_ri;
4356   merge->buf_rj       = buf_rj;
4357   merge->coi          = NULL;
4358   merge->coj          = NULL;
4359   merge->owners_co    = NULL;
4360 
4361   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4362 
4363   /* attach the supporting struct to B_mpi for reuse */
4364   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4365   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4366   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4367   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4368   *mpimat = B_mpi;
4369 
4370   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4371   PetscFunctionReturn(0);
4372 }
4373 
4374 #undef __FUNCT__
4375 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4376 /*@C
4377       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4378                  matrices from each processor
4379 
4380     Collective on MPI_Comm
4381 
4382    Input Parameters:
4383 +    comm - the communicators the parallel matrix will live on
4384 .    seqmat - the input sequential matrices
4385 .    m - number of local rows (or PETSC_DECIDE)
4386 .    n - number of local columns (or PETSC_DECIDE)
4387 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4388 
4389    Output Parameter:
4390 .    mpimat - the parallel matrix generated
4391 
4392     Level: advanced
4393 
4394    Notes:
4395      The dimensions of the sequential matrix in each processor MUST be the same.
4396      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4397      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4398 @*/
4399 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4400 {
4401   PetscErrorCode ierr;
4402   PetscMPIInt    size;
4403 
4404   PetscFunctionBegin;
4405   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4406   if (size == 1) {
4407     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4408     if (scall == MAT_INITIAL_MATRIX) {
4409       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4410     } else {
4411       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4412     }
4413     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4414     PetscFunctionReturn(0);
4415   }
4416   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4417   if (scall == MAT_INITIAL_MATRIX) {
4418     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4419   }
4420   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4421   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4422   PetscFunctionReturn(0);
4423 }
4424 
4425 #undef __FUNCT__
4426 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4427 /*@
4428      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4429           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4430           with MatGetSize()
4431 
4432     Not Collective
4433 
4434    Input Parameters:
4435 +    A - the matrix
4436 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4437 
4438    Output Parameter:
4439 .    A_loc - the local sequential matrix generated
4440 
4441     Level: developer
4442 
4443 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4444 
4445 @*/
4446 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4447 {
4448   PetscErrorCode ierr;
4449   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4450   Mat_SeqAIJ     *mat,*a,*b;
4451   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4452   MatScalar      *aa,*ba,*cam;
4453   PetscScalar    *ca;
4454   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4455   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4456   PetscBool      match;
4457   MPI_Comm       comm;
4458   PetscMPIInt    size;
4459 
4460   PetscFunctionBegin;
4461   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4462   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4463   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4464   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4465   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4466 
4467   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4468   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4469   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4470   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4471   aa = a->a; ba = b->a;
4472   if (scall == MAT_INITIAL_MATRIX) {
4473     if (size == 1) {
4474       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4475       PetscFunctionReturn(0);
4476     }
4477 
4478     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4479     ci[0] = 0;
4480     for (i=0; i<am; i++) {
4481       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4482     }
4483     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4484     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4485     k    = 0;
4486     for (i=0; i<am; i++) {
4487       ncols_o = bi[i+1] - bi[i];
4488       ncols_d = ai[i+1] - ai[i];
4489       /* off-diagonal portion of A */
4490       for (jo=0; jo<ncols_o; jo++) {
4491         col = cmap[*bj];
4492         if (col >= cstart) break;
4493         cj[k]   = col; bj++;
4494         ca[k++] = *ba++;
4495       }
4496       /* diagonal portion of A */
4497       for (j=0; j<ncols_d; j++) {
4498         cj[k]   = cstart + *aj++;
4499         ca[k++] = *aa++;
4500       }
4501       /* off-diagonal portion of A */
4502       for (j=jo; j<ncols_o; j++) {
4503         cj[k]   = cmap[*bj++];
4504         ca[k++] = *ba++;
4505       }
4506     }
4507     /* put together the new matrix */
4508     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4509     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4510     /* Since these are PETSc arrays, change flags to free them as necessary. */
4511     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4512     mat->free_a  = PETSC_TRUE;
4513     mat->free_ij = PETSC_TRUE;
4514     mat->nonew   = 0;
4515   } else if (scall == MAT_REUSE_MATRIX) {
4516     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4517     ci = mat->i; cj = mat->j; cam = mat->a;
4518     for (i=0; i<am; i++) {
4519       /* off-diagonal portion of A */
4520       ncols_o = bi[i+1] - bi[i];
4521       for (jo=0; jo<ncols_o; jo++) {
4522         col = cmap[*bj];
4523         if (col >= cstart) break;
4524         *cam++ = *ba++; bj++;
4525       }
4526       /* diagonal portion of A */
4527       ncols_d = ai[i+1] - ai[i];
4528       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4529       /* off-diagonal portion of A */
4530       for (j=jo; j<ncols_o; j++) {
4531         *cam++ = *ba++; bj++;
4532       }
4533     }
4534   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4535   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4536   PetscFunctionReturn(0);
4537 }
4538 
4539 #undef __FUNCT__
4540 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4541 /*@C
4542      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4543 
4544     Not Collective
4545 
4546    Input Parameters:
4547 +    A - the matrix
4548 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4549 -    row, col - index sets of rows and columns to extract (or NULL)
4550 
4551    Output Parameter:
4552 .    A_loc - the local sequential matrix generated
4553 
4554     Level: developer
4555 
4556 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4557 
4558 @*/
4559 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4560 {
4561   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4562   PetscErrorCode ierr;
4563   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4564   IS             isrowa,iscola;
4565   Mat            *aloc;
4566   PetscBool      match;
4567 
4568   PetscFunctionBegin;
4569   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4570   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4571   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4572   if (!row) {
4573     start = A->rmap->rstart; end = A->rmap->rend;
4574     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4575   } else {
4576     isrowa = *row;
4577   }
4578   if (!col) {
4579     start = A->cmap->rstart;
4580     cmap  = a->garray;
4581     nzA   = a->A->cmap->n;
4582     nzB   = a->B->cmap->n;
4583     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4584     ncols = 0;
4585     for (i=0; i<nzB; i++) {
4586       if (cmap[i] < start) idx[ncols++] = cmap[i];
4587       else break;
4588     }
4589     imark = i;
4590     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4591     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4592     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4593   } else {
4594     iscola = *col;
4595   }
4596   if (scall != MAT_INITIAL_MATRIX) {
4597     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4598     aloc[0] = *A_loc;
4599   }
4600   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4601   *A_loc = aloc[0];
4602   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4603   if (!row) {
4604     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4605   }
4606   if (!col) {
4607     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4608   }
4609   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4610   PetscFunctionReturn(0);
4611 }
4612 
4613 #undef __FUNCT__
4614 #define __FUNCT__ "MatGetBrowsOfAcols"
4615 /*@C
4616     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4617 
4618     Collective on Mat
4619 
4620    Input Parameters:
4621 +    A,B - the matrices in mpiaij format
4622 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4623 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4624 
4625    Output Parameter:
4626 +    rowb, colb - index sets of rows and columns of B to extract
4627 -    B_seq - the sequential matrix generated
4628 
4629     Level: developer
4630 
4631 @*/
4632 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4633 {
4634   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4635   PetscErrorCode ierr;
4636   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4637   IS             isrowb,iscolb;
4638   Mat            *bseq=NULL;
4639 
4640   PetscFunctionBegin;
4641   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4642     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4643   }
4644   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4645 
4646   if (scall == MAT_INITIAL_MATRIX) {
4647     start = A->cmap->rstart;
4648     cmap  = a->garray;
4649     nzA   = a->A->cmap->n;
4650     nzB   = a->B->cmap->n;
4651     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4652     ncols = 0;
4653     for (i=0; i<nzB; i++) {  /* row < local row index */
4654       if (cmap[i] < start) idx[ncols++] = cmap[i];
4655       else break;
4656     }
4657     imark = i;
4658     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4659     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4660     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4661     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4662   } else {
4663     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4664     isrowb  = *rowb; iscolb = *colb;
4665     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4666     bseq[0] = *B_seq;
4667   }
4668   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4669   *B_seq = bseq[0];
4670   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4671   if (!rowb) {
4672     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4673   } else {
4674     *rowb = isrowb;
4675   }
4676   if (!colb) {
4677     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4678   } else {
4679     *colb = iscolb;
4680   }
4681   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4682   PetscFunctionReturn(0);
4683 }
4684 
4685 #undef __FUNCT__
4686 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4687 /*
4688     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4689     of the OFF-DIAGONAL portion of local A
4690 
4691     Collective on Mat
4692 
4693    Input Parameters:
4694 +    A,B - the matrices in mpiaij format
4695 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4696 
4697    Output Parameter:
4698 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4699 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4700 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4701 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4702 
4703     Level: developer
4704 
4705 */
4706 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4707 {
4708   VecScatter_MPI_General *gen_to,*gen_from;
4709   PetscErrorCode         ierr;
4710   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4711   Mat_SeqAIJ             *b_oth;
4712   VecScatter             ctx =a->Mvctx;
4713   MPI_Comm               comm;
4714   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4715   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4716   PetscScalar            *rvalues,*svalues;
4717   MatScalar              *b_otha,*bufa,*bufA;
4718   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4719   MPI_Request            *rwaits = NULL,*swaits = NULL;
4720   MPI_Status             *sstatus,rstatus;
4721   PetscMPIInt            jj,size;
4722   PetscInt               *cols,sbs,rbs;
4723   PetscScalar            *vals;
4724 
4725   PetscFunctionBegin;
4726   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4727   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4728 
4729   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4730     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4731   }
4732   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4733   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4734 
4735   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4736   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4737   rvalues  = gen_from->values; /* holds the length of receiving row */
4738   svalues  = gen_to->values;   /* holds the length of sending row */
4739   nrecvs   = gen_from->n;
4740   nsends   = gen_to->n;
4741 
4742   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4743   srow    = gen_to->indices;    /* local row index to be sent */
4744   sstarts = gen_to->starts;
4745   sprocs  = gen_to->procs;
4746   sstatus = gen_to->sstatus;
4747   sbs     = gen_to->bs;
4748   rstarts = gen_from->starts;
4749   rprocs  = gen_from->procs;
4750   rbs     = gen_from->bs;
4751 
4752   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4753   if (scall == MAT_INITIAL_MATRIX) {
4754     /* i-array */
4755     /*---------*/
4756     /*  post receives */
4757     for (i=0; i<nrecvs; i++) {
4758       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4759       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4760       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4761     }
4762 
4763     /* pack the outgoing message */
4764     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4765 
4766     sstartsj[0] = 0;
4767     rstartsj[0] = 0;
4768     len         = 0; /* total length of j or a array to be sent */
4769     k           = 0;
4770     for (i=0; i<nsends; i++) {
4771       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4772       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4773       for (j=0; j<nrows; j++) {
4774         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4775         for (l=0; l<sbs; l++) {
4776           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4777 
4778           rowlen[j*sbs+l] = ncols;
4779 
4780           len += ncols;
4781           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4782         }
4783         k++;
4784       }
4785       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4786 
4787       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4788     }
4789     /* recvs and sends of i-array are completed */
4790     i = nrecvs;
4791     while (i--) {
4792       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4793     }
4794     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4795 
4796     /* allocate buffers for sending j and a arrays */
4797     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4798     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4799 
4800     /* create i-array of B_oth */
4801     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4802 
4803     b_othi[0] = 0;
4804     len       = 0; /* total length of j or a array to be received */
4805     k         = 0;
4806     for (i=0; i<nrecvs; i++) {
4807       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4808       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4809       for (j=0; j<nrows; j++) {
4810         b_othi[k+1] = b_othi[k] + rowlen[j];
4811         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4812         k++;
4813       }
4814       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4815     }
4816 
4817     /* allocate space for j and a arrrays of B_oth */
4818     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4819     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4820 
4821     /* j-array */
4822     /*---------*/
4823     /*  post receives of j-array */
4824     for (i=0; i<nrecvs; i++) {
4825       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4826       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4827     }
4828 
4829     /* pack the outgoing message j-array */
4830     k = 0;
4831     for (i=0; i<nsends; i++) {
4832       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4833       bufJ  = bufj+sstartsj[i];
4834       for (j=0; j<nrows; j++) {
4835         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4836         for (ll=0; ll<sbs; ll++) {
4837           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4838           for (l=0; l<ncols; l++) {
4839             *bufJ++ = cols[l];
4840           }
4841           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4842         }
4843       }
4844       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4845     }
4846 
4847     /* recvs and sends of j-array are completed */
4848     i = nrecvs;
4849     while (i--) {
4850       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4851     }
4852     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4853   } else if (scall == MAT_REUSE_MATRIX) {
4854     sstartsj = *startsj_s;
4855     rstartsj = *startsj_r;
4856     bufa     = *bufa_ptr;
4857     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4858     b_otha   = b_oth->a;
4859   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4860 
4861   /* a-array */
4862   /*---------*/
4863   /*  post receives of a-array */
4864   for (i=0; i<nrecvs; i++) {
4865     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4866     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4867   }
4868 
4869   /* pack the outgoing message a-array */
4870   k = 0;
4871   for (i=0; i<nsends; i++) {
4872     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4873     bufA  = bufa+sstartsj[i];
4874     for (j=0; j<nrows; j++) {
4875       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4876       for (ll=0; ll<sbs; ll++) {
4877         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4878         for (l=0; l<ncols; l++) {
4879           *bufA++ = vals[l];
4880         }
4881         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4882       }
4883     }
4884     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4885   }
4886   /* recvs and sends of a-array are completed */
4887   i = nrecvs;
4888   while (i--) {
4889     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4890   }
4891   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4892   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4893 
4894   if (scall == MAT_INITIAL_MATRIX) {
4895     /* put together the new matrix */
4896     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4897 
4898     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4899     /* Since these are PETSc arrays, change flags to free them as necessary. */
4900     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4901     b_oth->free_a  = PETSC_TRUE;
4902     b_oth->free_ij = PETSC_TRUE;
4903     b_oth->nonew   = 0;
4904 
4905     ierr = PetscFree(bufj);CHKERRQ(ierr);
4906     if (!startsj_s || !bufa_ptr) {
4907       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4908       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4909     } else {
4910       *startsj_s = sstartsj;
4911       *startsj_r = rstartsj;
4912       *bufa_ptr  = bufa;
4913     }
4914   }
4915   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4916   PetscFunctionReturn(0);
4917 }
4918 
4919 #undef __FUNCT__
4920 #define __FUNCT__ "MatGetCommunicationStructs"
4921 /*@C
4922   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4923 
4924   Not Collective
4925 
4926   Input Parameters:
4927 . A - The matrix in mpiaij format
4928 
4929   Output Parameter:
4930 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4931 . colmap - A map from global column index to local index into lvec
4932 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4933 
4934   Level: developer
4935 
4936 @*/
4937 #if defined(PETSC_USE_CTABLE)
4938 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4939 #else
4940 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4941 #endif
4942 {
4943   Mat_MPIAIJ *a;
4944 
4945   PetscFunctionBegin;
4946   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4947   PetscValidPointer(lvec, 2);
4948   PetscValidPointer(colmap, 3);
4949   PetscValidPointer(multScatter, 4);
4950   a = (Mat_MPIAIJ*) A->data;
4951   if (lvec) *lvec = a->lvec;
4952   if (colmap) *colmap = a->colmap;
4953   if (multScatter) *multScatter = a->Mvctx;
4954   PetscFunctionReturn(0);
4955 }
4956 
4957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4958 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4959 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4960 #if defined(PETSC_HAVE_ELEMENTAL)
4961 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4962 #endif
4963 
4964 #undef __FUNCT__
4965 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4966 /*
4967     Computes (B'*A')' since computing B*A directly is untenable
4968 
4969                n                       p                          p
4970         (              )       (              )         (                  )
4971       m (      A       )  *  n (       B      )   =   m (         C        )
4972         (              )       (              )         (                  )
4973 
4974 */
4975 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4976 {
4977   PetscErrorCode ierr;
4978   Mat            At,Bt,Ct;
4979 
4980   PetscFunctionBegin;
4981   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4982   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4983   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4984   ierr = MatDestroy(&At);CHKERRQ(ierr);
4985   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4986   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4987   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4988   PetscFunctionReturn(0);
4989 }
4990 
4991 #undef __FUNCT__
4992 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4993 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4994 {
4995   PetscErrorCode ierr;
4996   PetscInt       m=A->rmap->n,n=B->cmap->n;
4997   Mat            Cmat;
4998 
4999   PetscFunctionBegin;
5000   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5001   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5002   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5003   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5004   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5005   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5006   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5007   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5008 
5009   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5010 
5011   *C = Cmat;
5012   PetscFunctionReturn(0);
5013 }
5014 
5015 /* ----------------------------------------------------------------*/
5016 #undef __FUNCT__
5017 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5018 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5019 {
5020   PetscErrorCode ierr;
5021 
5022   PetscFunctionBegin;
5023   if (scall == MAT_INITIAL_MATRIX) {
5024     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5025     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5026     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5027   }
5028   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5029   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5030   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5031   PetscFunctionReturn(0);
5032 }
5033 
5034 /*MC
5035    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5036 
5037    Options Database Keys:
5038 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5039 
5040   Level: beginner
5041 
5042 .seealso: MatCreateAIJ()
5043 M*/
5044 
5045 #undef __FUNCT__
5046 #define __FUNCT__ "MatCreate_MPIAIJ"
5047 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5048 {
5049   Mat_MPIAIJ     *b;
5050   PetscErrorCode ierr;
5051   PetscMPIInt    size;
5052 
5053   PetscFunctionBegin;
5054   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5055 
5056   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5057   B->data       = (void*)b;
5058   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5059   B->assembled  = PETSC_FALSE;
5060   B->insertmode = NOT_SET_VALUES;
5061   b->size       = size;
5062 
5063   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5064 
5065   /* build cache for off array entries formed */
5066   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5067 
5068   b->donotstash  = PETSC_FALSE;
5069   b->colmap      = 0;
5070   b->garray      = 0;
5071   b->roworiented = PETSC_TRUE;
5072 
5073   /* stuff used for matrix vector multiply */
5074   b->lvec  = NULL;
5075   b->Mvctx = NULL;
5076 
5077   /* stuff for MatGetRow() */
5078   b->rowindices   = 0;
5079   b->rowvalues    = 0;
5080   b->getrowactive = PETSC_FALSE;
5081 
5082   /* flexible pointer used in CUSP/CUSPARSE classes */
5083   b->spptr = NULL;
5084 
5085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5096 #if defined(PETSC_HAVE_ELEMENTAL)
5097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5098 #endif
5099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5102   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5103   PetscFunctionReturn(0);
5104 }
5105 
5106 #undef __FUNCT__
5107 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5108 /*@
5109      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5110          and "off-diagonal" part of the matrix in CSR format.
5111 
5112    Collective on MPI_Comm
5113 
5114    Input Parameters:
5115 +  comm - MPI communicator
5116 .  m - number of local rows (Cannot be PETSC_DECIDE)
5117 .  n - This value should be the same as the local size used in creating the
5118        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5119        calculated if N is given) For square matrices n is almost always m.
5120 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5121 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5122 .   i - row indices for "diagonal" portion of matrix
5123 .   j - column indices
5124 .   a - matrix values
5125 .   oi - row indices for "off-diagonal" portion of matrix
5126 .   oj - column indices
5127 -   oa - matrix values
5128 
5129    Output Parameter:
5130 .   mat - the matrix
5131 
5132    Level: advanced
5133 
5134    Notes:
5135        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5136        must free the arrays once the matrix has been destroyed and not before.
5137 
5138        The i and j indices are 0 based
5139 
5140        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5141 
5142        This sets local rows and cannot be used to set off-processor values.
5143 
5144        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5145        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5146        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5147        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5148        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5149        communication if it is known that only local entries will be set.
5150 
5151 .keywords: matrix, aij, compressed row, sparse, parallel
5152 
5153 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5154           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5155 @*/
5156 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5157 {
5158   PetscErrorCode ierr;
5159   Mat_MPIAIJ     *maij;
5160 
5161   PetscFunctionBegin;
5162   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5163   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5164   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5165   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5166   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5167   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5168   maij = (Mat_MPIAIJ*) (*mat)->data;
5169 
5170   (*mat)->preallocated = PETSC_TRUE;
5171 
5172   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5173   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5174 
5175   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5176   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5177 
5178   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5179   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5180   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5181   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5182 
5183   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5184   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5185   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5186   PetscFunctionReturn(0);
5187 }
5188 
5189 /*
5190     Special version for direct calls from Fortran
5191 */
5192 #include <petsc/private/fortranimpl.h>
5193 
5194 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5195 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5196 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5197 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5198 #endif
5199 
5200 /* Change these macros so can be used in void function */
5201 #undef CHKERRQ
5202 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5203 #undef SETERRQ2
5204 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5205 #undef SETERRQ3
5206 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5207 #undef SETERRQ
5208 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5209 
5210 #undef __FUNCT__
5211 #define __FUNCT__ "matsetvaluesmpiaij_"
5212 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5213 {
5214   Mat            mat  = *mmat;
5215   PetscInt       m    = *mm, n = *mn;
5216   InsertMode     addv = *maddv;
5217   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5218   PetscScalar    value;
5219   PetscErrorCode ierr;
5220 
5221   MatCheckPreallocated(mat,1);
5222   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5223 
5224 #if defined(PETSC_USE_DEBUG)
5225   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5226 #endif
5227   {
5228     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5229     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5230     PetscBool roworiented = aij->roworiented;
5231 
5232     /* Some Variables required in the macro */
5233     Mat        A                 = aij->A;
5234     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5235     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5236     MatScalar  *aa               = a->a;
5237     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5238     Mat        B                 = aij->B;
5239     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5240     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5241     MatScalar  *ba               = b->a;
5242 
5243     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5244     PetscInt  nonew = a->nonew;
5245     MatScalar *ap1,*ap2;
5246 
5247     PetscFunctionBegin;
5248     for (i=0; i<m; i++) {
5249       if (im[i] < 0) continue;
5250 #if defined(PETSC_USE_DEBUG)
5251       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5252 #endif
5253       if (im[i] >= rstart && im[i] < rend) {
5254         row      = im[i] - rstart;
5255         lastcol1 = -1;
5256         rp1      = aj + ai[row];
5257         ap1      = aa + ai[row];
5258         rmax1    = aimax[row];
5259         nrow1    = ailen[row];
5260         low1     = 0;
5261         high1    = nrow1;
5262         lastcol2 = -1;
5263         rp2      = bj + bi[row];
5264         ap2      = ba + bi[row];
5265         rmax2    = bimax[row];
5266         nrow2    = bilen[row];
5267         low2     = 0;
5268         high2    = nrow2;
5269 
5270         for (j=0; j<n; j++) {
5271           if (roworiented) value = v[i*n+j];
5272           else value = v[i+j*m];
5273           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5274           if (in[j] >= cstart && in[j] < cend) {
5275             col = in[j] - cstart;
5276             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5277           } else if (in[j] < 0) continue;
5278 #if defined(PETSC_USE_DEBUG)
5279           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5280 #endif
5281           else {
5282             if (mat->was_assembled) {
5283               if (!aij->colmap) {
5284                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5285               }
5286 #if defined(PETSC_USE_CTABLE)
5287               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5288               col--;
5289 #else
5290               col = aij->colmap[in[j]] - 1;
5291 #endif
5292               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5293                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5294                 col  =  in[j];
5295                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5296                 B     = aij->B;
5297                 b     = (Mat_SeqAIJ*)B->data;
5298                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5299                 rp2   = bj + bi[row];
5300                 ap2   = ba + bi[row];
5301                 rmax2 = bimax[row];
5302                 nrow2 = bilen[row];
5303                 low2  = 0;
5304                 high2 = nrow2;
5305                 bm    = aij->B->rmap->n;
5306                 ba    = b->a;
5307               }
5308             } else col = in[j];
5309             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5310           }
5311         }
5312       } else if (!aij->donotstash) {
5313         if (roworiented) {
5314           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5315         } else {
5316           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5317         }
5318       }
5319     }
5320   }
5321   PetscFunctionReturnVoid();
5322 }
5323 
5324