xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision bd030d652f5d3cd4109cb5ea0f396aec124820bc)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *lrows;
787   PetscInt       r, len;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791   /* get locally owned rows */
792   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
793   /* fix right hand side if needed */
794   if (x && b) {
795     const PetscScalar *xx;
796     PetscScalar       *bb;
797 
798     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
799     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
800     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
801     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
802     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
803   }
804   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
805   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
806   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
807     PetscBool cong;
808     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
809     if (cong) A->congruentlayouts = 1;
810     else      A->congruentlayouts = 0;
811   }
812   if ((diag != 0.0) && A->congruentlayouts) {
813     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
814   } else if (diag != 0.0) {
815     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827 
828   /* only change matrix nonzero state if pattern was allowed to be changed */
829   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
830     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
831     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,p = 0,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x) {
888     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
891     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
892   }
893   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
894   /* remove zeroed rows of off diagonal matrix */
895   ii = aij->i;
896   for (i=0; i<len; i++) {
897     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
898   }
899   /* loop over all elements of off process part of matrix zeroing removed columns*/
900   if (aij->compressedrow.use) {
901     m    = aij->compressedrow.nrows;
902     ii   = aij->compressedrow.i;
903     ridx = aij->compressedrow.rindex;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908 
909       for (j=0; j<n; j++) {
910         if (PetscAbsScalar(mask[*aj])) {
911           if (b) bb[*ridx] -= *aa*xx[*aj];
912           *aa = 0.0;
913         }
914         aa++;
915         aj++;
916       }
917       ridx++;
918     }
919   } else { /* do not use compressed row format */
920     m = l->B->rmap->n;
921     for (i=0; i<m; i++) {
922       n  = ii[i+1] - ii[i];
923       aj = aij->j + ii[i];
924       aa = aij->a + ii[i];
925       for (j=0; j<n; j++) {
926         if (PetscAbsScalar(mask[*aj])) {
927           if (b) bb[i] -= *aa*xx[*aj];
928           *aa = 0.0;
929         }
930         aa++;
931         aj++;
932       }
933     }
934   }
935   if (x) {
936     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
937     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
938   }
939   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
940   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
941   ierr = PetscFree(lrows);CHKERRQ(ierr);
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMult_MPIAIJ"
953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
954 {
955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       nt;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
962   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMultAdd_MPIAIJ"
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002   PetscBool      merged;
1003 
1004   PetscFunctionBegin;
1005   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1006   /* do nondiagonal part */
1007   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1008   if (!merged) {
1009     /* send it on its way */
1010     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* receive remote parts: note this assumes the values are not actually */
1014     /* added in yy until the next line, */
1015     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   } else {
1017     /* do local part */
1018     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1019     /* send it on its way */
1020     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021     /* values actually were received in the Begin() but we need to call this nop */
1022     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1029 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1030 {
1031   MPI_Comm       comm;
1032   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1033   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1034   IS             Me,Notme;
1035   PetscErrorCode ierr;
1036   PetscInt       M,N,first,last,*notme,i;
1037   PetscMPIInt    size;
1038 
1039   PetscFunctionBegin;
1040   /* Easy test: symmetric diagonal block */
1041   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1042   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   /* do nondiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   /* send it on its way */
1080   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   /* do local part */
1082   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* receive remote parts */
1084   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /*
1089   This only works correctly for square matrices where the subblock A->A is the
1090    diagonal block
1091 */
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1095 {
1096   PetscErrorCode ierr;
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1101   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1102   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatScale_MPIAIJ"
1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1115   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatDestroy_MPIAIJ"
1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1122 {
1123   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127 #if defined(PETSC_USE_LOG)
1128   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1129 #endif
1130   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1131   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1132   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1133   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1134 #if defined(PETSC_USE_CTABLE)
1135   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1136 #else
1137   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1138 #endif
1139   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1140   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1141   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1142   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1143   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1144   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1145 
1146   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1154 #if defined(PETSC_HAVE_ELEMENTAL)
1155   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1156 #endif
1157 #if defined(PETSC_HAVE_HYPRE)
1158   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1159 #endif
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1165 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1166 {
1167   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1168   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1169   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1170   PetscErrorCode ierr;
1171   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1172   int            fd;
1173   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1174   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1175   PetscScalar    *column_values;
1176   PetscInt       message_count,flowcontrolcount;
1177   FILE           *file;
1178 
1179   PetscFunctionBegin;
1180   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1181   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1182   nz   = A->nz + B->nz;
1183   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1184   if (!rank) {
1185     header[0] = MAT_FILE_CLASSID;
1186     header[1] = mat->rmap->N;
1187     header[2] = mat->cmap->N;
1188 
1189     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1190     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1191     /* get largest number of rows any processor has */
1192     rlen  = mat->rmap->n;
1193     range = mat->rmap->range;
1194     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1195   } else {
1196     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1197     rlen = mat->rmap->n;
1198   }
1199 
1200   /* load up the local row counts */
1201   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1202   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1203 
1204   /* store the row lengths to the file */
1205   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1206   if (!rank) {
1207     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1208     for (i=1; i<size; i++) {
1209       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1210       rlen = range[i+1] - range[i];
1211       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1213     }
1214     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1215   } else {
1216     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1217     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1218     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1219   }
1220   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1221 
1222   /* load up the local column indices */
1223   nzmax = nz; /* th processor needs space a largest processor needs */
1224   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1226   cnt   = 0;
1227   for (i=0; i<mat->rmap->n; i++) {
1228     for (j=B->i[i]; j<B->i[i+1]; j++) {
1229       if ((col = garray[B->j[j]]) > cstart) break;
1230       column_indices[cnt++] = col;
1231     }
1232     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1233     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1234   }
1235   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1236 
1237   /* store the column indices to the file */
1238   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1239   if (!rank) {
1240     MPI_Status status;
1241     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     for (i=1; i<size; i++) {
1243       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1244       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1245       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1246       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1248     }
1249     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1250   } else {
1251     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1252     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1253     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1255   }
1256   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1257 
1258   /* load up the local column values */
1259   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1260   cnt  = 0;
1261   for (i=0; i<mat->rmap->n; i++) {
1262     for (j=B->i[i]; j<B->i[i+1]; j++) {
1263       if (garray[B->j[j]] > cstart) break;
1264       column_values[cnt++] = B->a[j];
1265     }
1266     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1267     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1268   }
1269   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1270 
1271   /* store the column values to the file */
1272   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1273   if (!rank) {
1274     MPI_Status status;
1275     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1276     for (i=1; i<size; i++) {
1277       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1278       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1279       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1280       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1282     }
1283     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1284   } else {
1285     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1286     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1287     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1288     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1289   }
1290   ierr = PetscFree(column_values);CHKERRQ(ierr);
1291 
1292   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1293   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #include <petscdraw.h>
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1300 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1301 {
1302   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1303   PetscErrorCode    ierr;
1304   PetscMPIInt       rank = aij->rank,size = aij->size;
1305   PetscBool         isdraw,iascii,isbinary;
1306   PetscViewer       sviewer;
1307   PetscViewerFormat format;
1308 
1309   PetscFunctionBegin;
1310   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1311   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1312   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1313   if (iascii) {
1314     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1315     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1316       MatInfo   info;
1317       PetscBool inodes;
1318 
1319       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1320       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1321       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1322       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1323       if (!inodes) {
1324         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1325                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1326       } else {
1327         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1328                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1329       }
1330       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1331       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1332       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1333       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1334       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1335       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1336       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1337       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1338       PetscFunctionReturn(0);
1339     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1340       PetscInt inodecount,inodelimit,*inodes;
1341       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1342       if (inodes) {
1343         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1344       } else {
1345         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1346       }
1347       PetscFunctionReturn(0);
1348     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1349       PetscFunctionReturn(0);
1350     }
1351   } else if (isbinary) {
1352     if (size == 1) {
1353       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1354       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1355     } else {
1356       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1357     }
1358     PetscFunctionReturn(0);
1359   } else if (isdraw) {
1360     PetscDraw draw;
1361     PetscBool isnull;
1362     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1363     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1364     if (isnull) PetscFunctionReturn(0);
1365   }
1366 
1367   {
1368     /* assemble the entire matrix onto first processor. */
1369     Mat        A;
1370     Mat_SeqAIJ *Aloc;
1371     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1372     MatScalar  *a;
1373 
1374     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1375     if (!rank) {
1376       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1377     } else {
1378       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1379     }
1380     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1381     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1382     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1383     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1384     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1385 
1386     /* copy over the A part */
1387     Aloc = (Mat_SeqAIJ*)aij->A->data;
1388     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1389     row  = mat->rmap->rstart;
1390     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1391     for (i=0; i<m; i++) {
1392       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1393       row++;
1394       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1395     }
1396     aj = Aloc->j;
1397     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1398 
1399     /* copy over the B part */
1400     Aloc = (Mat_SeqAIJ*)aij->B->data;
1401     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1402     row  = mat->rmap->rstart;
1403     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1404     ct   = cols;
1405     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1406     for (i=0; i<m; i++) {
1407       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1408       row++;
1409       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1410     }
1411     ierr = PetscFree(ct);CHKERRQ(ierr);
1412     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1413     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1414     /*
1415        Everyone has to call to draw the matrix since the graphics waits are
1416        synchronized across all processors that share the PetscDraw object
1417     */
1418     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1419     if (!rank) {
1420       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1421       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1422     }
1423     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1424     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1425     ierr = MatDestroy(&A);CHKERRQ(ierr);
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatView_MPIAIJ"
1432 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1433 {
1434   PetscErrorCode ierr;
1435   PetscBool      iascii,isdraw,issocket,isbinary;
1436 
1437   PetscFunctionBegin;
1438   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1439   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1440   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1441   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1442   if (iascii || isdraw || isbinary || issocket) {
1443     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1444   }
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatSOR_MPIAIJ"
1450 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1451 {
1452   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1453   PetscErrorCode ierr;
1454   Vec            bb1 = 0;
1455   PetscBool      hasop;
1456 
1457   PetscFunctionBegin;
1458   if (flag == SOR_APPLY_UPPER) {
1459     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1460     PetscFunctionReturn(0);
1461   }
1462 
1463   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1464     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1465   }
1466 
1467   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1468     if (flag & SOR_ZERO_INITIAL_GUESS) {
1469       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1470       its--;
1471     }
1472 
1473     while (its--) {
1474       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1475       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1476 
1477       /* update rhs: bb1 = bb - B*x */
1478       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1479       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1480 
1481       /* local sweep */
1482       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1483     }
1484   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1485     if (flag & SOR_ZERO_INITIAL_GUESS) {
1486       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1487       its--;
1488     }
1489     while (its--) {
1490       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1491       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1492 
1493       /* update rhs: bb1 = bb - B*x */
1494       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1495       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1496 
1497       /* local sweep */
1498       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1499     }
1500   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1501     if (flag & SOR_ZERO_INITIAL_GUESS) {
1502       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1503       its--;
1504     }
1505     while (its--) {
1506       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1507       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1508 
1509       /* update rhs: bb1 = bb - B*x */
1510       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1511       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1512 
1513       /* local sweep */
1514       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1515     }
1516   } else if (flag & SOR_EISENSTAT) {
1517     Vec xx1;
1518 
1519     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1520     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1521 
1522     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1523     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1524     if (!mat->diag) {
1525       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1526       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1527     }
1528     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1529     if (hasop) {
1530       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1531     } else {
1532       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1533     }
1534     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1535 
1536     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1537 
1538     /* local sweep */
1539     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1540     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1541     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1542   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1543 
1544   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1545 
1546   matin->factorerrortype = mat->A->factorerrortype;
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatPermute_MPIAIJ"
1552 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1553 {
1554   Mat            aA,aB,Aperm;
1555   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1556   PetscScalar    *aa,*ba;
1557   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1558   PetscSF        rowsf,sf;
1559   IS             parcolp = NULL;
1560   PetscBool      done;
1561   PetscErrorCode ierr;
1562 
1563   PetscFunctionBegin;
1564   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1565   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1566   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1567   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1568 
1569   /* Invert row permutation to find out where my rows should go */
1570   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1571   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1572   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1573   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1574   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1575   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1576 
1577   /* Invert column permutation to find out where my columns should go */
1578   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1579   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1580   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1581   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1582   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1583   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1584   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1585 
1586   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1587   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1588   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1589 
1590   /* Find out where my gcols should go */
1591   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1592   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1593   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1594   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1595   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1596   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1597   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1598   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1599 
1600   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1601   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1602   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1603   for (i=0; i<m; i++) {
1604     PetscInt row = rdest[i],rowner;
1605     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1606     for (j=ai[i]; j<ai[i+1]; j++) {
1607       PetscInt cowner,col = cdest[aj[j]];
1608       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1609       if (rowner == cowner) dnnz[i]++;
1610       else onnz[i]++;
1611     }
1612     for (j=bi[i]; j<bi[i+1]; j++) {
1613       PetscInt cowner,col = gcdest[bj[j]];
1614       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1615       if (rowner == cowner) dnnz[i]++;
1616       else onnz[i]++;
1617     }
1618   }
1619   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1620   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1621   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1622   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1623   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1624 
1625   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1626   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1627   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1628   for (i=0; i<m; i++) {
1629     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1630     PetscInt j0,rowlen;
1631     rowlen = ai[i+1] - ai[i];
1632     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1633       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1634       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1635     }
1636     rowlen = bi[i+1] - bi[i];
1637     for (j0=j=0; j<rowlen; j0=j) {
1638       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1639       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1640     }
1641   }
1642   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1644   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1645   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1646   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1647   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1648   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1649   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1650   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1651   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1652   *B = Aperm;
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1658 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1659 {
1660   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1661   PetscErrorCode ierr;
1662 
1663   PetscFunctionBegin;
1664   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1665   if (ghosts) *ghosts = aij->garray;
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 #undef __FUNCT__
1670 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1671 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1672 {
1673   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1674   Mat            A    = mat->A,B = mat->B;
1675   PetscErrorCode ierr;
1676   PetscReal      isend[5],irecv[5];
1677 
1678   PetscFunctionBegin;
1679   info->block_size = 1.0;
1680   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1681 
1682   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1683   isend[3] = info->memory;  isend[4] = info->mallocs;
1684 
1685   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1686 
1687   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1688   isend[3] += info->memory;  isend[4] += info->mallocs;
1689   if (flag == MAT_LOCAL) {
1690     info->nz_used      = isend[0];
1691     info->nz_allocated = isend[1];
1692     info->nz_unneeded  = isend[2];
1693     info->memory       = isend[3];
1694     info->mallocs      = isend[4];
1695   } else if (flag == MAT_GLOBAL_MAX) {
1696     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1697 
1698     info->nz_used      = irecv[0];
1699     info->nz_allocated = irecv[1];
1700     info->nz_unneeded  = irecv[2];
1701     info->memory       = irecv[3];
1702     info->mallocs      = irecv[4];
1703   } else if (flag == MAT_GLOBAL_SUM) {
1704     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1705 
1706     info->nz_used      = irecv[0];
1707     info->nz_allocated = irecv[1];
1708     info->nz_unneeded  = irecv[2];
1709     info->memory       = irecv[3];
1710     info->mallocs      = irecv[4];
1711   }
1712   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1713   info->fill_ratio_needed = 0;
1714   info->factor_mallocs    = 0;
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 #undef __FUNCT__
1719 #define __FUNCT__ "MatSetOption_MPIAIJ"
1720 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1721 {
1722   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1723   PetscErrorCode ierr;
1724 
1725   PetscFunctionBegin;
1726   switch (op) {
1727   case MAT_NEW_NONZERO_LOCATIONS:
1728   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1729   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1730   case MAT_KEEP_NONZERO_PATTERN:
1731   case MAT_NEW_NONZERO_LOCATION_ERR:
1732   case MAT_USE_INODES:
1733   case MAT_IGNORE_ZERO_ENTRIES:
1734     MatCheckPreallocated(A,1);
1735     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1736     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1737     break;
1738   case MAT_ROW_ORIENTED:
1739     MatCheckPreallocated(A,1);
1740     a->roworiented = flg;
1741 
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1744     break;
1745   case MAT_NEW_DIAGONALS:
1746     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1747     break;
1748   case MAT_IGNORE_OFF_PROC_ENTRIES:
1749     a->donotstash = flg;
1750     break;
1751   case MAT_SPD:
1752     A->spd_set = PETSC_TRUE;
1753     A->spd     = flg;
1754     if (flg) {
1755       A->symmetric                  = PETSC_TRUE;
1756       A->structurally_symmetric     = PETSC_TRUE;
1757       A->symmetric_set              = PETSC_TRUE;
1758       A->structurally_symmetric_set = PETSC_TRUE;
1759     }
1760     break;
1761   case MAT_SYMMETRIC:
1762     MatCheckPreallocated(A,1);
1763     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1764     break;
1765   case MAT_STRUCTURALLY_SYMMETRIC:
1766     MatCheckPreallocated(A,1);
1767     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1768     break;
1769   case MAT_HERMITIAN:
1770     MatCheckPreallocated(A,1);
1771     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1772     break;
1773   case MAT_SYMMETRY_ETERNAL:
1774     MatCheckPreallocated(A,1);
1775     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1776     break;
1777   default:
1778     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1779   }
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 #undef __FUNCT__
1784 #define __FUNCT__ "MatGetRow_MPIAIJ"
1785 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1786 {
1787   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1788   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1789   PetscErrorCode ierr;
1790   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1791   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1792   PetscInt       *cmap,*idx_p;
1793 
1794   PetscFunctionBegin;
1795   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1796   mat->getrowactive = PETSC_TRUE;
1797 
1798   if (!mat->rowvalues && (idx || v)) {
1799     /*
1800         allocate enough space to hold information from the longest row.
1801     */
1802     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1803     PetscInt   max = 1,tmp;
1804     for (i=0; i<matin->rmap->n; i++) {
1805       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1806       if (max < tmp) max = tmp;
1807     }
1808     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1809   }
1810 
1811   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1812   lrow = row - rstart;
1813 
1814   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1815   if (!v)   {pvA = 0; pvB = 0;}
1816   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1817   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1818   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1819   nztot = nzA + nzB;
1820 
1821   cmap = mat->garray;
1822   if (v  || idx) {
1823     if (nztot) {
1824       /* Sort by increasing column numbers, assuming A and B already sorted */
1825       PetscInt imark = -1;
1826       if (v) {
1827         *v = v_p = mat->rowvalues;
1828         for (i=0; i<nzB; i++) {
1829           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1830           else break;
1831         }
1832         imark = i;
1833         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1834         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1835       }
1836       if (idx) {
1837         *idx = idx_p = mat->rowindices;
1838         if (imark > -1) {
1839           for (i=0; i<imark; i++) {
1840             idx_p[i] = cmap[cworkB[i]];
1841           }
1842         } else {
1843           for (i=0; i<nzB; i++) {
1844             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1845             else break;
1846           }
1847           imark = i;
1848         }
1849         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1850         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1851       }
1852     } else {
1853       if (idx) *idx = 0;
1854       if (v)   *v   = 0;
1855     }
1856   }
1857   *nz  = nztot;
1858   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1859   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1865 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1866 {
1867   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1868 
1869   PetscFunctionBegin;
1870   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1871   aij->getrowactive = PETSC_FALSE;
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 #undef __FUNCT__
1876 #define __FUNCT__ "MatNorm_MPIAIJ"
1877 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1878 {
1879   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1880   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1881   PetscErrorCode ierr;
1882   PetscInt       i,j,cstart = mat->cmap->rstart;
1883   PetscReal      sum = 0.0;
1884   MatScalar      *v;
1885 
1886   PetscFunctionBegin;
1887   if (aij->size == 1) {
1888     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1889   } else {
1890     if (type == NORM_FROBENIUS) {
1891       v = amat->a;
1892       for (i=0; i<amat->nz; i++) {
1893         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1894       }
1895       v = bmat->a;
1896       for (i=0; i<bmat->nz; i++) {
1897         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1898       }
1899       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1900       *norm = PetscSqrtReal(*norm);
1901       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1902     } else if (type == NORM_1) { /* max column norm */
1903       PetscReal *tmp,*tmp2;
1904       PetscInt  *jj,*garray = aij->garray;
1905       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1906       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1907       *norm = 0.0;
1908       v     = amat->a; jj = amat->j;
1909       for (j=0; j<amat->nz; j++) {
1910         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1911       }
1912       v = bmat->a; jj = bmat->j;
1913       for (j=0; j<bmat->nz; j++) {
1914         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1915       }
1916       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1917       for (j=0; j<mat->cmap->N; j++) {
1918         if (tmp2[j] > *norm) *norm = tmp2[j];
1919       }
1920       ierr = PetscFree(tmp);CHKERRQ(ierr);
1921       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1922       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1923     } else if (type == NORM_INFINITY) { /* max row norm */
1924       PetscReal ntemp = 0.0;
1925       for (j=0; j<aij->A->rmap->n; j++) {
1926         v   = amat->a + amat->i[j];
1927         sum = 0.0;
1928         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1929           sum += PetscAbsScalar(*v); v++;
1930         }
1931         v = bmat->a + bmat->i[j];
1932         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1933           sum += PetscAbsScalar(*v); v++;
1934         }
1935         if (sum > ntemp) ntemp = sum;
1936       }
1937       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1938       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1939     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1940   }
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 #undef __FUNCT__
1945 #define __FUNCT__ "MatTranspose_MPIAIJ"
1946 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1947 {
1948   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1949   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1950   PetscErrorCode ierr;
1951   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1952   PetscInt       cstart = A->cmap->rstart,ncol;
1953   Mat            B;
1954   MatScalar      *array;
1955 
1956   PetscFunctionBegin;
1957   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1958 
1959   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1960   ai = Aloc->i; aj = Aloc->j;
1961   bi = Bloc->i; bj = Bloc->j;
1962   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1963     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1964     PetscSFNode          *oloc;
1965     PETSC_UNUSED PetscSF sf;
1966 
1967     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1968     /* compute d_nnz for preallocation */
1969     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1970     for (i=0; i<ai[ma]; i++) {
1971       d_nnz[aj[i]]++;
1972       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1973     }
1974     /* compute local off-diagonal contributions */
1975     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1976     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1977     /* map those to global */
1978     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1979     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1980     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1981     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1982     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1983     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1984     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1985 
1986     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1987     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1988     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1989     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1990     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1991     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1992   } else {
1993     B    = *matout;
1994     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1995     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1996   }
1997 
1998   /* copy over the A part */
1999   array = Aloc->a;
2000   row   = A->rmap->rstart;
2001   for (i=0; i<ma; i++) {
2002     ncol = ai[i+1]-ai[i];
2003     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2004     row++;
2005     array += ncol; aj += ncol;
2006   }
2007   aj = Aloc->j;
2008   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2009 
2010   /* copy over the B part */
2011   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2012   array = Bloc->a;
2013   row   = A->rmap->rstart;
2014   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2015   cols_tmp = cols;
2016   for (i=0; i<mb; i++) {
2017     ncol = bi[i+1]-bi[i];
2018     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2019     row++;
2020     array += ncol; cols_tmp += ncol;
2021   }
2022   ierr = PetscFree(cols);CHKERRQ(ierr);
2023 
2024   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2025   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2026   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2027     *matout = B;
2028   } else {
2029     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2030   }
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 #undef __FUNCT__
2035 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2036 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2037 {
2038   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2039   Mat            a    = aij->A,b = aij->B;
2040   PetscErrorCode ierr;
2041   PetscInt       s1,s2,s3;
2042 
2043   PetscFunctionBegin;
2044   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2045   if (rr) {
2046     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2047     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2048     /* Overlap communication with computation. */
2049     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2050   }
2051   if (ll) {
2052     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2053     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2054     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2055   }
2056   /* scale  the diagonal block */
2057   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2058 
2059   if (rr) {
2060     /* Do a scatter end and then right scale the off-diagonal block */
2061     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2062     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2063   }
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 #undef __FUNCT__
2068 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2069 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2070 {
2071   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2072   PetscErrorCode ierr;
2073 
2074   PetscFunctionBegin;
2075   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatEqual_MPIAIJ"
2081 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2082 {
2083   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2084   Mat            a,b,c,d;
2085   PetscBool      flg;
2086   PetscErrorCode ierr;
2087 
2088   PetscFunctionBegin;
2089   a = matA->A; b = matA->B;
2090   c = matB->A; d = matB->B;
2091 
2092   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2093   if (flg) {
2094     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2095   }
2096   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2097   PetscFunctionReturn(0);
2098 }
2099 
2100 #undef __FUNCT__
2101 #define __FUNCT__ "MatCopy_MPIAIJ"
2102 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2103 {
2104   PetscErrorCode ierr;
2105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2106   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2107 
2108   PetscFunctionBegin;
2109   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2110   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2111     /* because of the column compression in the off-processor part of the matrix a->B,
2112        the number of columns in a->B and b->B may be different, hence we cannot call
2113        the MatCopy() directly on the two parts. If need be, we can provide a more
2114        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2115        then copying the submatrices */
2116     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2117   } else {
2118     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2119     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2120   }
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 #undef __FUNCT__
2125 #define __FUNCT__ "MatSetUp_MPIAIJ"
2126 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2127 {
2128   PetscErrorCode ierr;
2129 
2130   PetscFunctionBegin;
2131   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 /*
2136    Computes the number of nonzeros per row needed for preallocation when X and Y
2137    have different nonzero structure.
2138 */
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2141 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2142 {
2143   PetscInt       i,j,k,nzx,nzy;
2144 
2145   PetscFunctionBegin;
2146   /* Set the number of nonzeros in the new matrix */
2147   for (i=0; i<m; i++) {
2148     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2149     nzx = xi[i+1] - xi[i];
2150     nzy = yi[i+1] - yi[i];
2151     nnz[i] = 0;
2152     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2153       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2154       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2155       nnz[i]++;
2156     }
2157     for (; k<nzy; k++) nnz[i]++;
2158   }
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2163 #undef __FUNCT__
2164 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2165 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2166 {
2167   PetscErrorCode ierr;
2168   PetscInt       m = Y->rmap->N;
2169   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2170   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2171 
2172   PetscFunctionBegin;
2173   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatAXPY_MPIAIJ"
2179 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2180 {
2181   PetscErrorCode ierr;
2182   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2183   PetscBLASInt   bnz,one=1;
2184   Mat_SeqAIJ     *x,*y;
2185 
2186   PetscFunctionBegin;
2187   if (str == SAME_NONZERO_PATTERN) {
2188     PetscScalar alpha = a;
2189     x    = (Mat_SeqAIJ*)xx->A->data;
2190     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2191     y    = (Mat_SeqAIJ*)yy->A->data;
2192     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2193     x    = (Mat_SeqAIJ*)xx->B->data;
2194     y    = (Mat_SeqAIJ*)yy->B->data;
2195     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2196     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2197     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2198   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2199     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2200   } else {
2201     Mat      B;
2202     PetscInt *nnz_d,*nnz_o;
2203     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2204     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2205     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2206     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2207     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2208     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2209     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2210     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2211     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2212     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2213     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2214     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2215     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2216     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2217   }
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2222 
2223 #undef __FUNCT__
2224 #define __FUNCT__ "MatConjugate_MPIAIJ"
2225 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2226 {
2227 #if defined(PETSC_USE_COMPLEX)
2228   PetscErrorCode ierr;
2229   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2230 
2231   PetscFunctionBegin;
2232   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2233   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2234 #else
2235   PetscFunctionBegin;
2236 #endif
2237   PetscFunctionReturn(0);
2238 }
2239 
2240 #undef __FUNCT__
2241 #define __FUNCT__ "MatRealPart_MPIAIJ"
2242 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2243 {
2244   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2245   PetscErrorCode ierr;
2246 
2247   PetscFunctionBegin;
2248   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2249   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2255 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2256 {
2257   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2258   PetscErrorCode ierr;
2259 
2260   PetscFunctionBegin;
2261   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2262   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 #undef __FUNCT__
2267 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2268 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2269 {
2270   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2271   PetscErrorCode ierr;
2272   PetscInt       i,*idxb = 0;
2273   PetscScalar    *va,*vb;
2274   Vec            vtmp;
2275 
2276   PetscFunctionBegin;
2277   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2278   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2279   if (idx) {
2280     for (i=0; i<A->rmap->n; i++) {
2281       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2282     }
2283   }
2284 
2285   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2286   if (idx) {
2287     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2288   }
2289   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2290   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2291 
2292   for (i=0; i<A->rmap->n; i++) {
2293     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2294       va[i] = vb[i];
2295       if (idx) idx[i] = a->garray[idxb[i]];
2296     }
2297   }
2298 
2299   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2300   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2301   ierr = PetscFree(idxb);CHKERRQ(ierr);
2302   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 #undef __FUNCT__
2307 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2308 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2309 {
2310   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2311   PetscErrorCode ierr;
2312   PetscInt       i,*idxb = 0;
2313   PetscScalar    *va,*vb;
2314   Vec            vtmp;
2315 
2316   PetscFunctionBegin;
2317   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2318   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2319   if (idx) {
2320     for (i=0; i<A->cmap->n; i++) {
2321       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2322     }
2323   }
2324 
2325   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2326   if (idx) {
2327     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2328   }
2329   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2330   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2331 
2332   for (i=0; i<A->rmap->n; i++) {
2333     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2334       va[i] = vb[i];
2335       if (idx) idx[i] = a->garray[idxb[i]];
2336     }
2337   }
2338 
2339   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2340   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2341   ierr = PetscFree(idxb);CHKERRQ(ierr);
2342   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2348 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2349 {
2350   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2351   PetscInt       n      = A->rmap->n;
2352   PetscInt       cstart = A->cmap->rstart;
2353   PetscInt       *cmap  = mat->garray;
2354   PetscInt       *diagIdx, *offdiagIdx;
2355   Vec            diagV, offdiagV;
2356   PetscScalar    *a, *diagA, *offdiagA;
2357   PetscInt       r;
2358   PetscErrorCode ierr;
2359 
2360   PetscFunctionBegin;
2361   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2362   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2363   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2364   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2365   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2366   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2367   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2368   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2369   for (r = 0; r < n; ++r) {
2370     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2371       a[r]   = diagA[r];
2372       idx[r] = cstart + diagIdx[r];
2373     } else {
2374       a[r]   = offdiagA[r];
2375       idx[r] = cmap[offdiagIdx[r]];
2376     }
2377   }
2378   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2379   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2380   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2381   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2382   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2383   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2389 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2390 {
2391   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2392   PetscInt       n      = A->rmap->n;
2393   PetscInt       cstart = A->cmap->rstart;
2394   PetscInt       *cmap  = mat->garray;
2395   PetscInt       *diagIdx, *offdiagIdx;
2396   Vec            diagV, offdiagV;
2397   PetscScalar    *a, *diagA, *offdiagA;
2398   PetscInt       r;
2399   PetscErrorCode ierr;
2400 
2401   PetscFunctionBegin;
2402   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2403   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2404   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2405   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2406   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2407   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2408   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2409   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2410   for (r = 0; r < n; ++r) {
2411     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2412       a[r]   = diagA[r];
2413       idx[r] = cstart + diagIdx[r];
2414     } else {
2415       a[r]   = offdiagA[r];
2416       idx[r] = cmap[offdiagIdx[r]];
2417     }
2418   }
2419   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2420   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2421   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2422   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2423   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2424   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2430 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2431 {
2432   PetscErrorCode ierr;
2433   Mat            *dummy;
2434 
2435   PetscFunctionBegin;
2436   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2437   *newmat = *dummy;
2438   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 #undef __FUNCT__
2443 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2444 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2445 {
2446   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2447   PetscErrorCode ierr;
2448 
2449   PetscFunctionBegin;
2450   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2451   A->factorerrortype = a->A->factorerrortype;
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 #undef __FUNCT__
2456 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2457 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2458 {
2459   PetscErrorCode ierr;
2460   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2461 
2462   PetscFunctionBegin;
2463   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2464   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2465   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2466   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2472 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2473 {
2474   PetscFunctionBegin;
2475   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2476   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2482 /*@
2483    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2484 
2485    Collective on Mat
2486 
2487    Input Parameters:
2488 +    A - the matrix
2489 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2490 
2491  Level: advanced
2492 
2493 @*/
2494 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2495 {
2496   PetscErrorCode       ierr;
2497 
2498   PetscFunctionBegin;
2499   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 #undef __FUNCT__
2504 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2505 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2506 {
2507   PetscErrorCode       ierr;
2508   PetscBool            sc = PETSC_FALSE,flg;
2509 
2510   PetscFunctionBegin;
2511   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2512   ierr = PetscObjectOptionsBegin((PetscObject)A);
2513     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2514     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2515     if (flg) {
2516       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2517     }
2518   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatShift_MPIAIJ"
2524 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2525 {
2526   PetscErrorCode ierr;
2527   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2528   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2529 
2530   PetscFunctionBegin;
2531   if (!Y->preallocated) {
2532     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2533   } else if (!aij->nz) {
2534     PetscInt nonew = aij->nonew;
2535     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2536     aij->nonew = nonew;
2537   }
2538   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 #undef __FUNCT__
2543 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2544 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2545 {
2546   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2547   PetscErrorCode ierr;
2548 
2549   PetscFunctionBegin;
2550   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2551   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2552   if (d) {
2553     PetscInt rstart;
2554     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2555     *d += rstart;
2556 
2557   }
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 
2562 /* -------------------------------------------------------------------*/
2563 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2564                                        MatGetRow_MPIAIJ,
2565                                        MatRestoreRow_MPIAIJ,
2566                                        MatMult_MPIAIJ,
2567                                 /* 4*/ MatMultAdd_MPIAIJ,
2568                                        MatMultTranspose_MPIAIJ,
2569                                        MatMultTransposeAdd_MPIAIJ,
2570                                        0,
2571                                        0,
2572                                        0,
2573                                 /*10*/ 0,
2574                                        0,
2575                                        0,
2576                                        MatSOR_MPIAIJ,
2577                                        MatTranspose_MPIAIJ,
2578                                 /*15*/ MatGetInfo_MPIAIJ,
2579                                        MatEqual_MPIAIJ,
2580                                        MatGetDiagonal_MPIAIJ,
2581                                        MatDiagonalScale_MPIAIJ,
2582                                        MatNorm_MPIAIJ,
2583                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2584                                        MatAssemblyEnd_MPIAIJ,
2585                                        MatSetOption_MPIAIJ,
2586                                        MatZeroEntries_MPIAIJ,
2587                                 /*24*/ MatZeroRows_MPIAIJ,
2588                                        0,
2589                                        0,
2590                                        0,
2591                                        0,
2592                                 /*29*/ MatSetUp_MPIAIJ,
2593                                        0,
2594                                        0,
2595                                        MatGetDiagonalBlock_MPIAIJ,
2596                                        0,
2597                                 /*34*/ MatDuplicate_MPIAIJ,
2598                                        0,
2599                                        0,
2600                                        0,
2601                                        0,
2602                                 /*39*/ MatAXPY_MPIAIJ,
2603                                        MatGetSubMatrices_MPIAIJ,
2604                                        MatIncreaseOverlap_MPIAIJ,
2605                                        MatGetValues_MPIAIJ,
2606                                        MatCopy_MPIAIJ,
2607                                 /*44*/ MatGetRowMax_MPIAIJ,
2608                                        MatScale_MPIAIJ,
2609                                        MatShift_MPIAIJ,
2610                                        MatDiagonalSet_MPIAIJ,
2611                                        MatZeroRowsColumns_MPIAIJ,
2612                                 /*49*/ MatSetRandom_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2618                                        0,
2619                                        MatSetUnfactored_MPIAIJ,
2620                                        MatPermute_MPIAIJ,
2621                                        0,
2622                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2623                                        MatDestroy_MPIAIJ,
2624                                        MatView_MPIAIJ,
2625                                        0,
2626                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2627                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2628                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2633                                        MatGetRowMinAbs_MPIAIJ,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                 /*75*/ MatFDColoringApply_AIJ,
2639                                        MatSetFromOptions_MPIAIJ,
2640                                        0,
2641                                        0,
2642                                        MatFindZeroDiagonals_MPIAIJ,
2643                                 /*80*/ 0,
2644                                        0,
2645                                        0,
2646                                 /*83*/ MatLoad_MPIAIJ,
2647                                        0,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2653                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2654                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2655                                        MatPtAP_MPIAIJ_MPIAIJ,
2656                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2657                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                 /*99*/ 0,
2663                                        0,
2664                                        0,
2665                                        MatConjugate_MPIAIJ,
2666                                        0,
2667                                 /*104*/MatSetValuesRow_MPIAIJ,
2668                                        MatRealPart_MPIAIJ,
2669                                        MatImaginaryPart_MPIAIJ,
2670                                        0,
2671                                        0,
2672                                 /*109*/0,
2673                                        0,
2674                                        MatGetRowMin_MPIAIJ,
2675                                        0,
2676                                        MatMissingDiagonal_MPIAIJ,
2677                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2678                                        0,
2679                                        MatGetGhosts_MPIAIJ,
2680                                        0,
2681                                        0,
2682                                 /*119*/0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        MatGetMultiProcBlock_MPIAIJ,
2687                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2688                                        MatGetColumnNorms_MPIAIJ,
2689                                        MatInvertBlockDiagonal_MPIAIJ,
2690                                        0,
2691                                        MatGetSubMatricesMPI_MPIAIJ,
2692                                 /*129*/0,
2693                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2694                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2695                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2696                                        0,
2697                                 /*134*/0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                 /*139*/0,
2703                                        0,
2704                                        0,
2705                                        MatFDColoringSetUp_MPIXAIJ,
2706                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2707                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2708 };
2709 
2710 /* ----------------------------------------------------------------------------------------*/
2711 
2712 #undef __FUNCT__
2713 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2714 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2715 {
2716   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2717   PetscErrorCode ierr;
2718 
2719   PetscFunctionBegin;
2720   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2721   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2722   PetscFunctionReturn(0);
2723 }
2724 
2725 #undef __FUNCT__
2726 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2727 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2728 {
2729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2730   PetscErrorCode ierr;
2731 
2732   PetscFunctionBegin;
2733   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2734   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2735   PetscFunctionReturn(0);
2736 }
2737 
2738 #undef __FUNCT__
2739 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2740 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2741 {
2742   Mat_MPIAIJ     *b;
2743   PetscErrorCode ierr;
2744 
2745   PetscFunctionBegin;
2746   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2747   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2748   b = (Mat_MPIAIJ*)B->data;
2749 
2750 #if defined(PETSC_USE_CTABLE)
2751   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2752 #else
2753   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2754 #endif
2755   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2756   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2757   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2758 
2759   /* Because the B will have been resized we simply destroy it and create a new one each time */
2760   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2761   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2762   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2763   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2764   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2765   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2766 
2767   if (!B->preallocated) {
2768     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2769     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2770     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2771     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2772     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2773   }
2774 
2775   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2776   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2777   B->preallocated  = PETSC_TRUE;
2778   B->was_assembled = PETSC_FALSE;
2779   B->assembled     = PETSC_FALSE;;
2780   PetscFunctionReturn(0);
2781 }
2782 
2783 #undef __FUNCT__
2784 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2785 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2786 {
2787   Mat            mat;
2788   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2789   PetscErrorCode ierr;
2790 
2791   PetscFunctionBegin;
2792   *newmat = 0;
2793   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2794   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2795   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2796   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2797   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2798   a       = (Mat_MPIAIJ*)mat->data;
2799 
2800   mat->factortype   = matin->factortype;
2801   mat->assembled    = PETSC_TRUE;
2802   mat->insertmode   = NOT_SET_VALUES;
2803   mat->preallocated = PETSC_TRUE;
2804 
2805   a->size         = oldmat->size;
2806   a->rank         = oldmat->rank;
2807   a->donotstash   = oldmat->donotstash;
2808   a->roworiented  = oldmat->roworiented;
2809   a->rowindices   = 0;
2810   a->rowvalues    = 0;
2811   a->getrowactive = PETSC_FALSE;
2812 
2813   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2814   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2815 
2816   if (oldmat->colmap) {
2817 #if defined(PETSC_USE_CTABLE)
2818     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2819 #else
2820     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2821     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2822     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2823 #endif
2824   } else a->colmap = 0;
2825   if (oldmat->garray) {
2826     PetscInt len;
2827     len  = oldmat->B->cmap->n;
2828     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2829     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2830     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2831   } else a->garray = 0;
2832 
2833   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2834   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2835   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2836   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2837   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2838   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2839   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2840   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2841   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2842   *newmat = mat;
2843   PetscFunctionReturn(0);
2844 }
2845 
2846 
2847 
2848 #undef __FUNCT__
2849 #define __FUNCT__ "MatLoad_MPIAIJ"
2850 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2851 {
2852   PetscScalar    *vals,*svals;
2853   MPI_Comm       comm;
2854   PetscErrorCode ierr;
2855   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2856   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2857   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2858   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2859   PetscInt       cend,cstart,n,*rowners;
2860   int            fd;
2861   PetscInt       bs = newMat->rmap->bs;
2862 
2863   PetscFunctionBegin;
2864   /* force binary viewer to load .info file if it has not yet done so */
2865   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2866   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2867   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2868   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2869   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2870   if (!rank) {
2871     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2872     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2873     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2874   }
2875 
2876   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2877   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2878   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2879   if (bs < 0) bs = 1;
2880 
2881   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2882   M    = header[1]; N = header[2];
2883 
2884   /* If global sizes are set, check if they are consistent with that given in the file */
2885   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2886   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2887 
2888   /* determine ownership of all (block) rows */
2889   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2890   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2891   else m = newMat->rmap->n; /* Set by user */
2892 
2893   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2894   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2895 
2896   /* First process needs enough room for process with most rows */
2897   if (!rank) {
2898     mmax = rowners[1];
2899     for (i=2; i<=size; i++) {
2900       mmax = PetscMax(mmax, rowners[i]);
2901     }
2902   } else mmax = -1;             /* unused, but compilers complain */
2903 
2904   rowners[0] = 0;
2905   for (i=2; i<=size; i++) {
2906     rowners[i] += rowners[i-1];
2907   }
2908   rstart = rowners[rank];
2909   rend   = rowners[rank+1];
2910 
2911   /* distribute row lengths to all processors */
2912   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2913   if (!rank) {
2914     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2915     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2916     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2917     for (j=0; j<m; j++) {
2918       procsnz[0] += ourlens[j];
2919     }
2920     for (i=1; i<size; i++) {
2921       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2922       /* calculate the number of nonzeros on each processor */
2923       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2924         procsnz[i] += rowlengths[j];
2925       }
2926       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2927     }
2928     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2929   } else {
2930     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2931   }
2932 
2933   if (!rank) {
2934     /* determine max buffer needed and allocate it */
2935     maxnz = 0;
2936     for (i=0; i<size; i++) {
2937       maxnz = PetscMax(maxnz,procsnz[i]);
2938     }
2939     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2940 
2941     /* read in my part of the matrix column indices  */
2942     nz   = procsnz[0];
2943     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2944     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2945 
2946     /* read in every one elses and ship off */
2947     for (i=1; i<size; i++) {
2948       nz   = procsnz[i];
2949       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2950       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2951     }
2952     ierr = PetscFree(cols);CHKERRQ(ierr);
2953   } else {
2954     /* determine buffer space needed for message */
2955     nz = 0;
2956     for (i=0; i<m; i++) {
2957       nz += ourlens[i];
2958     }
2959     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2960 
2961     /* receive message of column indices*/
2962     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2963   }
2964 
2965   /* determine column ownership if matrix is not square */
2966   if (N != M) {
2967     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2968     else n = newMat->cmap->n;
2969     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2970     cstart = cend - n;
2971   } else {
2972     cstart = rstart;
2973     cend   = rend;
2974     n      = cend - cstart;
2975   }
2976 
2977   /* loop over local rows, determining number of off diagonal entries */
2978   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2979   jj   = 0;
2980   for (i=0; i<m; i++) {
2981     for (j=0; j<ourlens[i]; j++) {
2982       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2983       jj++;
2984     }
2985   }
2986 
2987   for (i=0; i<m; i++) {
2988     ourlens[i] -= offlens[i];
2989   }
2990   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2991 
2992   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2993 
2994   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2995 
2996   for (i=0; i<m; i++) {
2997     ourlens[i] += offlens[i];
2998   }
2999 
3000   if (!rank) {
3001     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3002 
3003     /* read in my part of the matrix numerical values  */
3004     nz   = procsnz[0];
3005     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3006 
3007     /* insert into matrix */
3008     jj      = rstart;
3009     smycols = mycols;
3010     svals   = vals;
3011     for (i=0; i<m; i++) {
3012       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3013       smycols += ourlens[i];
3014       svals   += ourlens[i];
3015       jj++;
3016     }
3017 
3018     /* read in other processors and ship out */
3019     for (i=1; i<size; i++) {
3020       nz   = procsnz[i];
3021       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3022       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3023     }
3024     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3025   } else {
3026     /* receive numeric values */
3027     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3028 
3029     /* receive message of values*/
3030     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3031 
3032     /* insert into matrix */
3033     jj      = rstart;
3034     smycols = mycols;
3035     svals   = vals;
3036     for (i=0; i<m; i++) {
3037       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3038       smycols += ourlens[i];
3039       svals   += ourlens[i];
3040       jj++;
3041     }
3042   }
3043   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3044   ierr = PetscFree(vals);CHKERRQ(ierr);
3045   ierr = PetscFree(mycols);CHKERRQ(ierr);
3046   ierr = PetscFree(rowners);CHKERRQ(ierr);
3047   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3048   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3049   PetscFunctionReturn(0);
3050 }
3051 
3052 #undef __FUNCT__
3053 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3054 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3055 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3056 {
3057   PetscErrorCode ierr;
3058   IS             iscol_local;
3059   PetscInt       csize;
3060 
3061   PetscFunctionBegin;
3062   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3063   if (call == MAT_REUSE_MATRIX) {
3064     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3065     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3066   } else {
3067     /* check if we are grabbing all columns*/
3068     PetscBool    isstride;
3069     PetscMPIInt  lisstride = 0,gisstride;
3070     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3071     if (isstride) {
3072       PetscInt  start,len,mstart,mlen;
3073       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3074       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3075       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3076       if (mstart == start && mlen-mstart == len) lisstride = 1;
3077     }
3078     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3079     if (gisstride) {
3080       PetscInt N;
3081       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3082       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3083       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3084       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3085     } else {
3086       PetscInt cbs;
3087       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3088       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3089       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3090     }
3091   }
3092   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3093   if (call == MAT_INITIAL_MATRIX) {
3094     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3095     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3096   }
3097   PetscFunctionReturn(0);
3098 }
3099 
3100 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3101 #undef __FUNCT__
3102 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3103 /*
3104     Not great since it makes two copies of the submatrix, first an SeqAIJ
3105   in local and then by concatenating the local matrices the end result.
3106   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3107 
3108   Note: This requires a sequential iscol with all indices.
3109 */
3110 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3111 {
3112   PetscErrorCode ierr;
3113   PetscMPIInt    rank,size;
3114   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3115   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3116   PetscBool      allcolumns, colflag;
3117   Mat            M,Mreuse;
3118   MatScalar      *vwork,*aa;
3119   MPI_Comm       comm;
3120   Mat_SeqAIJ     *aij;
3121 
3122   PetscFunctionBegin;
3123   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3124   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3125   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3126 
3127   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3128   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3129   if (colflag && ncol == mat->cmap->N) {
3130     allcolumns = PETSC_TRUE;
3131     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3132   } else {
3133     allcolumns = PETSC_FALSE;
3134   }
3135   if (call ==  MAT_REUSE_MATRIX) {
3136     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3137     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3138     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3139   } else {
3140     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3141   }
3142 
3143   /*
3144       m - number of local rows
3145       n - number of columns (same on all processors)
3146       rstart - first row in new global matrix generated
3147   */
3148   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3149   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3150   if (call == MAT_INITIAL_MATRIX) {
3151     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3152     ii  = aij->i;
3153     jj  = aij->j;
3154 
3155     /*
3156         Determine the number of non-zeros in the diagonal and off-diagonal
3157         portions of the matrix in order to do correct preallocation
3158     */
3159 
3160     /* first get start and end of "diagonal" columns */
3161     if (csize == PETSC_DECIDE) {
3162       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3163       if (mglobal == n) { /* square matrix */
3164         nlocal = m;
3165       } else {
3166         nlocal = n/size + ((n % size) > rank);
3167       }
3168     } else {
3169       nlocal = csize;
3170     }
3171     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3172     rstart = rend - nlocal;
3173     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3174 
3175     /* next, compute all the lengths */
3176     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3177     olens = dlens + m;
3178     for (i=0; i<m; i++) {
3179       jend = ii[i+1] - ii[i];
3180       olen = 0;
3181       dlen = 0;
3182       for (j=0; j<jend; j++) {
3183         if (*jj < rstart || *jj >= rend) olen++;
3184         else dlen++;
3185         jj++;
3186       }
3187       olens[i] = olen;
3188       dlens[i] = dlen;
3189     }
3190     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3191     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3192     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3193     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3194     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3195     ierr = PetscFree(dlens);CHKERRQ(ierr);
3196   } else {
3197     PetscInt ml,nl;
3198 
3199     M    = *newmat;
3200     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3201     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3202     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3203     /*
3204          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3205        rather than the slower MatSetValues().
3206     */
3207     M->was_assembled = PETSC_TRUE;
3208     M->assembled     = PETSC_FALSE;
3209   }
3210   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3211   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3212   ii   = aij->i;
3213   jj   = aij->j;
3214   aa   = aij->a;
3215   for (i=0; i<m; i++) {
3216     row   = rstart + i;
3217     nz    = ii[i+1] - ii[i];
3218     cwork = jj;     jj += nz;
3219     vwork = aa;     aa += nz;
3220     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3221   }
3222 
3223   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3224   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3225   *newmat = M;
3226 
3227   /* save submatrix used in processor for next request */
3228   if (call ==  MAT_INITIAL_MATRIX) {
3229     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3230     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3231   }
3232   PetscFunctionReturn(0);
3233 }
3234 
3235 #undef __FUNCT__
3236 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3237 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3238 {
3239   PetscInt       m,cstart, cend,j,nnz,i,d;
3240   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3241   const PetscInt *JJ;
3242   PetscScalar    *values;
3243   PetscErrorCode ierr;
3244 
3245   PetscFunctionBegin;
3246   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3247 
3248   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3249   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3250   m      = B->rmap->n;
3251   cstart = B->cmap->rstart;
3252   cend   = B->cmap->rend;
3253   rstart = B->rmap->rstart;
3254 
3255   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3256 
3257 #if defined(PETSC_USE_DEBUGGING)
3258   for (i=0; i<m; i++) {
3259     nnz = Ii[i+1]- Ii[i];
3260     JJ  = J + Ii[i];
3261     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3262     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3263     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3264   }
3265 #endif
3266 
3267   for (i=0; i<m; i++) {
3268     nnz     = Ii[i+1]- Ii[i];
3269     JJ      = J + Ii[i];
3270     nnz_max = PetscMax(nnz_max,nnz);
3271     d       = 0;
3272     for (j=0; j<nnz; j++) {
3273       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3274     }
3275     d_nnz[i] = d;
3276     o_nnz[i] = nnz - d;
3277   }
3278   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3279   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3280 
3281   if (v) values = (PetscScalar*)v;
3282   else {
3283     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3284   }
3285 
3286   for (i=0; i<m; i++) {
3287     ii   = i + rstart;
3288     nnz  = Ii[i+1]- Ii[i];
3289     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3290   }
3291   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3292   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3293 
3294   if (!v) {
3295     ierr = PetscFree(values);CHKERRQ(ierr);
3296   }
3297   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3298   PetscFunctionReturn(0);
3299 }
3300 
3301 #undef __FUNCT__
3302 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3303 /*@
3304    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3305    (the default parallel PETSc format).
3306 
3307    Collective on MPI_Comm
3308 
3309    Input Parameters:
3310 +  B - the matrix
3311 .  i - the indices into j for the start of each local row (starts with zero)
3312 .  j - the column indices for each local row (starts with zero)
3313 -  v - optional values in the matrix
3314 
3315    Level: developer
3316 
3317    Notes:
3318        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3319      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3320      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3321 
3322        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3323 
3324        The format which is used for the sparse matrix input, is equivalent to a
3325     row-major ordering.. i.e for the following matrix, the input data expected is
3326     as shown
3327 
3328 $        1 0 0
3329 $        2 0 3     P0
3330 $       -------
3331 $        4 5 6     P1
3332 $
3333 $     Process0 [P0]: rows_owned=[0,1]
3334 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3335 $        j =  {0,0,2}  [size = 3]
3336 $        v =  {1,2,3}  [size = 3]
3337 $
3338 $     Process1 [P1]: rows_owned=[2]
3339 $        i =  {0,3}    [size = nrow+1  = 1+1]
3340 $        j =  {0,1,2}  [size = 3]
3341 $        v =  {4,5,6}  [size = 3]
3342 
3343 .keywords: matrix, aij, compressed row, sparse, parallel
3344 
3345 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3346           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3347 @*/
3348 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3349 {
3350   PetscErrorCode ierr;
3351 
3352   PetscFunctionBegin;
3353   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3354   PetscFunctionReturn(0);
3355 }
3356 
3357 #undef __FUNCT__
3358 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3359 /*@C
3360    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3361    (the default parallel PETSc format).  For good matrix assembly performance
3362    the user should preallocate the matrix storage by setting the parameters
3363    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3364    performance can be increased by more than a factor of 50.
3365 
3366    Collective on MPI_Comm
3367 
3368    Input Parameters:
3369 +  B - the matrix
3370 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3371            (same value is used for all local rows)
3372 .  d_nnz - array containing the number of nonzeros in the various rows of the
3373            DIAGONAL portion of the local submatrix (possibly different for each row)
3374            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3375            The size of this array is equal to the number of local rows, i.e 'm'.
3376            For matrices that will be factored, you must leave room for (and set)
3377            the diagonal entry even if it is zero.
3378 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3379            submatrix (same value is used for all local rows).
3380 -  o_nnz - array containing the number of nonzeros in the various rows of the
3381            OFF-DIAGONAL portion of the local submatrix (possibly different for
3382            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3383            structure. The size of this array is equal to the number
3384            of local rows, i.e 'm'.
3385 
3386    If the *_nnz parameter is given then the *_nz parameter is ignored
3387 
3388    The AIJ format (also called the Yale sparse matrix format or
3389    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3390    storage.  The stored row and column indices begin with zero.
3391    See Users-Manual: ch_mat for details.
3392 
3393    The parallel matrix is partitioned such that the first m0 rows belong to
3394    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3395    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3396 
3397    The DIAGONAL portion of the local submatrix of a processor can be defined
3398    as the submatrix which is obtained by extraction the part corresponding to
3399    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3400    first row that belongs to the processor, r2 is the last row belonging to
3401    the this processor, and c1-c2 is range of indices of the local part of a
3402    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3403    common case of a square matrix, the row and column ranges are the same and
3404    the DIAGONAL part is also square. The remaining portion of the local
3405    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3406 
3407    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3408 
3409    You can call MatGetInfo() to get information on how effective the preallocation was;
3410    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3411    You can also run with the option -info and look for messages with the string
3412    malloc in them to see if additional memory allocation was needed.
3413 
3414    Example usage:
3415 
3416    Consider the following 8x8 matrix with 34 non-zero values, that is
3417    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3418    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3419    as follows:
3420 
3421 .vb
3422             1  2  0  |  0  3  0  |  0  4
3423     Proc0   0  5  6  |  7  0  0  |  8  0
3424             9  0 10  | 11  0  0  | 12  0
3425     -------------------------------------
3426            13  0 14  | 15 16 17  |  0  0
3427     Proc1   0 18  0  | 19 20 21  |  0  0
3428             0  0  0  | 22 23  0  | 24  0
3429     -------------------------------------
3430     Proc2  25 26 27  |  0  0 28  | 29  0
3431            30  0  0  | 31 32 33  |  0 34
3432 .ve
3433 
3434    This can be represented as a collection of submatrices as:
3435 
3436 .vb
3437       A B C
3438       D E F
3439       G H I
3440 .ve
3441 
3442    Where the submatrices A,B,C are owned by proc0, D,E,F are
3443    owned by proc1, G,H,I are owned by proc2.
3444 
3445    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3446    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3447    The 'M','N' parameters are 8,8, and have the same values on all procs.
3448 
3449    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3450    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3451    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3452    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3453    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3454    matrix, ans [DF] as another SeqAIJ matrix.
3455 
3456    When d_nz, o_nz parameters are specified, d_nz storage elements are
3457    allocated for every row of the local diagonal submatrix, and o_nz
3458    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3459    One way to choose d_nz and o_nz is to use the max nonzerors per local
3460    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3461    In this case, the values of d_nz,o_nz are:
3462 .vb
3463      proc0 : dnz = 2, o_nz = 2
3464      proc1 : dnz = 3, o_nz = 2
3465      proc2 : dnz = 1, o_nz = 4
3466 .ve
3467    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3468    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3469    for proc3. i.e we are using 12+15+10=37 storage locations to store
3470    34 values.
3471 
3472    When d_nnz, o_nnz parameters are specified, the storage is specified
3473    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3474    In the above case the values for d_nnz,o_nnz are:
3475 .vb
3476      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3477      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3478      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3479 .ve
3480    Here the space allocated is sum of all the above values i.e 34, and
3481    hence pre-allocation is perfect.
3482 
3483    Level: intermediate
3484 
3485 .keywords: matrix, aij, compressed row, sparse, parallel
3486 
3487 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3488           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3489 @*/
3490 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3491 {
3492   PetscErrorCode ierr;
3493 
3494   PetscFunctionBegin;
3495   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3496   PetscValidType(B,1);
3497   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3498   PetscFunctionReturn(0);
3499 }
3500 
3501 #undef __FUNCT__
3502 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3503 /*@
3504      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3505          CSR format the local rows.
3506 
3507    Collective on MPI_Comm
3508 
3509    Input Parameters:
3510 +  comm - MPI communicator
3511 .  m - number of local rows (Cannot be PETSC_DECIDE)
3512 .  n - This value should be the same as the local size used in creating the
3513        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3514        calculated if N is given) For square matrices n is almost always m.
3515 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3516 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3517 .   i - row indices
3518 .   j - column indices
3519 -   a - matrix values
3520 
3521    Output Parameter:
3522 .   mat - the matrix
3523 
3524    Level: intermediate
3525 
3526    Notes:
3527        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3528      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3529      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3530 
3531        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3532 
3533        The format which is used for the sparse matrix input, is equivalent to a
3534     row-major ordering.. i.e for the following matrix, the input data expected is
3535     as shown
3536 
3537 $        1 0 0
3538 $        2 0 3     P0
3539 $       -------
3540 $        4 5 6     P1
3541 $
3542 $     Process0 [P0]: rows_owned=[0,1]
3543 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3544 $        j =  {0,0,2}  [size = 3]
3545 $        v =  {1,2,3}  [size = 3]
3546 $
3547 $     Process1 [P1]: rows_owned=[2]
3548 $        i =  {0,3}    [size = nrow+1  = 1+1]
3549 $        j =  {0,1,2}  [size = 3]
3550 $        v =  {4,5,6}  [size = 3]
3551 
3552 .keywords: matrix, aij, compressed row, sparse, parallel
3553 
3554 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3555           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3556 @*/
3557 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3558 {
3559   PetscErrorCode ierr;
3560 
3561   PetscFunctionBegin;
3562   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3563   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3564   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3565   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3566   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3567   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3568   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3569   PetscFunctionReturn(0);
3570 }
3571 
3572 #undef __FUNCT__
3573 #define __FUNCT__ "MatCreateAIJ"
3574 /*@C
3575    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3576    (the default parallel PETSc format).  For good matrix assembly performance
3577    the user should preallocate the matrix storage by setting the parameters
3578    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3579    performance can be increased by more than a factor of 50.
3580 
3581    Collective on MPI_Comm
3582 
3583    Input Parameters:
3584 +  comm - MPI communicator
3585 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3586            This value should be the same as the local size used in creating the
3587            y vector for the matrix-vector product y = Ax.
3588 .  n - This value should be the same as the local size used in creating the
3589        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3590        calculated if N is given) For square matrices n is almost always m.
3591 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3592 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3593 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3594            (same value is used for all local rows)
3595 .  d_nnz - array containing the number of nonzeros in the various rows of the
3596            DIAGONAL portion of the local submatrix (possibly different for each row)
3597            or NULL, if d_nz is used to specify the nonzero structure.
3598            The size of this array is equal to the number of local rows, i.e 'm'.
3599 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3600            submatrix (same value is used for all local rows).
3601 -  o_nnz - array containing the number of nonzeros in the various rows of the
3602            OFF-DIAGONAL portion of the local submatrix (possibly different for
3603            each row) or NULL, if o_nz is used to specify the nonzero
3604            structure. The size of this array is equal to the number
3605            of local rows, i.e 'm'.
3606 
3607    Output Parameter:
3608 .  A - the matrix
3609 
3610    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3611    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3612    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3613 
3614    Notes:
3615    If the *_nnz parameter is given then the *_nz parameter is ignored
3616 
3617    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3618    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3619    storage requirements for this matrix.
3620 
3621    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3622    processor than it must be used on all processors that share the object for
3623    that argument.
3624 
3625    The user MUST specify either the local or global matrix dimensions
3626    (possibly both).
3627 
3628    The parallel matrix is partitioned across processors such that the
3629    first m0 rows belong to process 0, the next m1 rows belong to
3630    process 1, the next m2 rows belong to process 2 etc.. where
3631    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3632    values corresponding to [m x N] submatrix.
3633 
3634    The columns are logically partitioned with the n0 columns belonging
3635    to 0th partition, the next n1 columns belonging to the next
3636    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3637 
3638    The DIAGONAL portion of the local submatrix on any given processor
3639    is the submatrix corresponding to the rows and columns m,n
3640    corresponding to the given processor. i.e diagonal matrix on
3641    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3642    etc. The remaining portion of the local submatrix [m x (N-n)]
3643    constitute the OFF-DIAGONAL portion. The example below better
3644    illustrates this concept.
3645 
3646    For a square global matrix we define each processor's diagonal portion
3647    to be its local rows and the corresponding columns (a square submatrix);
3648    each processor's off-diagonal portion encompasses the remainder of the
3649    local matrix (a rectangular submatrix).
3650 
3651    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3652 
3653    When calling this routine with a single process communicator, a matrix of
3654    type SEQAIJ is returned.  If a matrix of type MATMPIAIJ is desired for this
3655    type of communicator, use the construction mechanism:
3656      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3657 
3658    By default, this format uses inodes (identical nodes) when possible.
3659    We search for consecutive rows with the same nonzero structure, thereby
3660    reusing matrix information to achieve increased efficiency.
3661 
3662    Options Database Keys:
3663 +  -mat_no_inode  - Do not use inodes
3664 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3665 -  -mat_aij_oneindex - Internally use indexing starting at 1
3666         rather than 0.  Note that when calling MatSetValues(),
3667         the user still MUST index entries starting at 0!
3668 
3669 
3670    Example usage:
3671 
3672    Consider the following 8x8 matrix with 34 non-zero values, that is
3673    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3674    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3675    as follows:
3676 
3677 .vb
3678             1  2  0  |  0  3  0  |  0  4
3679     Proc0   0  5  6  |  7  0  0  |  8  0
3680             9  0 10  | 11  0  0  | 12  0
3681     -------------------------------------
3682            13  0 14  | 15 16 17  |  0  0
3683     Proc1   0 18  0  | 19 20 21  |  0  0
3684             0  0  0  | 22 23  0  | 24  0
3685     -------------------------------------
3686     Proc2  25 26 27  |  0  0 28  | 29  0
3687            30  0  0  | 31 32 33  |  0 34
3688 .ve
3689 
3690    This can be represented as a collection of submatrices as:
3691 
3692 .vb
3693       A B C
3694       D E F
3695       G H I
3696 .ve
3697 
3698    Where the submatrices A,B,C are owned by proc0, D,E,F are
3699    owned by proc1, G,H,I are owned by proc2.
3700 
3701    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3702    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3703    The 'M','N' parameters are 8,8, and have the same values on all procs.
3704 
3705    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3706    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3707    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3708    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3709    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3710    matrix, ans [DF] as another SeqAIJ matrix.
3711 
3712    When d_nz, o_nz parameters are specified, d_nz storage elements are
3713    allocated for every row of the local diagonal submatrix, and o_nz
3714    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3715    One way to choose d_nz and o_nz is to use the max nonzerors per local
3716    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3717    In this case, the values of d_nz,o_nz are:
3718 .vb
3719      proc0 : dnz = 2, o_nz = 2
3720      proc1 : dnz = 3, o_nz = 2
3721      proc2 : dnz = 1, o_nz = 4
3722 .ve
3723    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3724    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3725    for proc3. i.e we are using 12+15+10=37 storage locations to store
3726    34 values.
3727 
3728    When d_nnz, o_nnz parameters are specified, the storage is specified
3729    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3730    In the above case the values for d_nnz,o_nnz are:
3731 .vb
3732      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3733      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3734      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3735 .ve
3736    Here the space allocated is sum of all the above values i.e 34, and
3737    hence pre-allocation is perfect.
3738 
3739    Level: intermediate
3740 
3741 .keywords: matrix, aij, compressed row, sparse, parallel
3742 
3743 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3744           MATMPIAIJ, MatCreateMPIAIJWithArrays()
3745 @*/
3746 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3747 {
3748   PetscErrorCode ierr;
3749   PetscMPIInt    size;
3750 
3751   PetscFunctionBegin;
3752   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3753   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3754   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3755   if (size > 1) {
3756     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3757     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3758   } else {
3759     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3760     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3761   }
3762   PetscFunctionReturn(0);
3763 }
3764 
3765 #undef __FUNCT__
3766 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3767 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3768 {
3769   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3770   PetscBool      flg;
3771   PetscErrorCode ierr;
3772 
3773   PetscFunctionBegin;
3774   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3775   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
3776   if (Ad)     *Ad     = a->A;
3777   if (Ao)     *Ao     = a->B;
3778   if (colmap) *colmap = a->garray;
3779   PetscFunctionReturn(0);
3780 }
3781 
3782 #undef __FUNCT__
3783 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3784 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3785 {
3786   PetscErrorCode ierr;
3787   PetscInt       m,N,i,rstart,nnz,Ii;
3788   PetscInt       *indx;
3789   PetscScalar    *values;
3790 
3791   PetscFunctionBegin;
3792   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3793   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3794     PetscInt       *dnz,*onz,sum,bs,cbs;
3795 
3796     if (n == PETSC_DECIDE) {
3797       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3798     }
3799     /* Check sum(n) = N */
3800     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3801     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3802 
3803     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3804     rstart -= m;
3805 
3806     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3807     for (i=0; i<m; i++) {
3808       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3809       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3810       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3811     }
3812 
3813     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3814     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3815     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3816     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3817     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3818     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3819     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3820   }
3821 
3822   /* numeric phase */
3823   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3824   for (i=0; i<m; i++) {
3825     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3826     Ii   = i + rstart;
3827     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3828     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3829   }
3830   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3831   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3832   PetscFunctionReturn(0);
3833 }
3834 
3835 #undef __FUNCT__
3836 #define __FUNCT__ "MatFileSplit"
3837 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3838 {
3839   PetscErrorCode    ierr;
3840   PetscMPIInt       rank;
3841   PetscInt          m,N,i,rstart,nnz;
3842   size_t            len;
3843   const PetscInt    *indx;
3844   PetscViewer       out;
3845   char              *name;
3846   Mat               B;
3847   const PetscScalar *values;
3848 
3849   PetscFunctionBegin;
3850   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3851   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3852   /* Should this be the type of the diagonal block of A? */
3853   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3854   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3855   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3856   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3857   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3858   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3859   for (i=0; i<m; i++) {
3860     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3861     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3862     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3863   }
3864   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3865   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3866 
3867   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3868   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3869   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3870   sprintf(name,"%s.%d",outfile,rank);
3871   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3872   ierr = PetscFree(name);CHKERRQ(ierr);
3873   ierr = MatView(B,out);CHKERRQ(ierr);
3874   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3875   ierr = MatDestroy(&B);CHKERRQ(ierr);
3876   PetscFunctionReturn(0);
3877 }
3878 
3879 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3880 #undef __FUNCT__
3881 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3882 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3883 {
3884   PetscErrorCode      ierr;
3885   Mat_Merge_SeqsToMPI *merge;
3886   PetscContainer      container;
3887 
3888   PetscFunctionBegin;
3889   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3890   if (container) {
3891     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3892     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3893     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3894     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3895     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3896     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3897     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3898     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3899     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3900     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3901     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3902     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3903     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3904     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3905     ierr = PetscFree(merge);CHKERRQ(ierr);
3906     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3907   }
3908   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3909   PetscFunctionReturn(0);
3910 }
3911 
3912 #include <../src/mat/utils/freespace.h>
3913 #include <petscbt.h>
3914 
3915 #undef __FUNCT__
3916 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3917 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3918 {
3919   PetscErrorCode      ierr;
3920   MPI_Comm            comm;
3921   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3922   PetscMPIInt         size,rank,taga,*len_s;
3923   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3924   PetscInt            proc,m;
3925   PetscInt            **buf_ri,**buf_rj;
3926   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3927   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3928   MPI_Request         *s_waits,*r_waits;
3929   MPI_Status          *status;
3930   MatScalar           *aa=a->a;
3931   MatScalar           **abuf_r,*ba_i;
3932   Mat_Merge_SeqsToMPI *merge;
3933   PetscContainer      container;
3934 
3935   PetscFunctionBegin;
3936   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3937   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3938 
3939   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3940   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3941 
3942   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3943   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3944 
3945   bi     = merge->bi;
3946   bj     = merge->bj;
3947   buf_ri = merge->buf_ri;
3948   buf_rj = merge->buf_rj;
3949 
3950   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3951   owners = merge->rowmap->range;
3952   len_s  = merge->len_s;
3953 
3954   /* send and recv matrix values */
3955   /*-----------------------------*/
3956   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3957   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3958 
3959   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
3960   for (proc=0,k=0; proc<size; proc++) {
3961     if (!len_s[proc]) continue;
3962     i    = owners[proc];
3963     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3964     k++;
3965   }
3966 
3967   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3968   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3969   ierr = PetscFree(status);CHKERRQ(ierr);
3970 
3971   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3972   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3973 
3974   /* insert mat values of mpimat */
3975   /*----------------------------*/
3976   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
3977   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
3978 
3979   for (k=0; k<merge->nrecv; k++) {
3980     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3981     nrows       = *(buf_ri_k[k]);
3982     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3983     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
3984   }
3985 
3986   /* set values of ba */
3987   m = merge->rowmap->n;
3988   for (i=0; i<m; i++) {
3989     arow = owners[rank] + i;
3990     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3991     bnzi = bi[i+1] - bi[i];
3992     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3993 
3994     /* add local non-zero vals of this proc's seqmat into ba */
3995     anzi   = ai[arow+1] - ai[arow];
3996     aj     = a->j + ai[arow];
3997     aa     = a->a + ai[arow];
3998     nextaj = 0;
3999     for (j=0; nextaj<anzi; j++) {
4000       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4001         ba_i[j] += aa[nextaj++];
4002       }
4003     }
4004 
4005     /* add received vals into ba */
4006     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4007       /* i-th row */
4008       if (i == *nextrow[k]) {
4009         anzi   = *(nextai[k]+1) - *nextai[k];
4010         aj     = buf_rj[k] + *(nextai[k]);
4011         aa     = abuf_r[k] + *(nextai[k]);
4012         nextaj = 0;
4013         for (j=0; nextaj<anzi; j++) {
4014           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4015             ba_i[j] += aa[nextaj++];
4016           }
4017         }
4018         nextrow[k]++; nextai[k]++;
4019       }
4020     }
4021     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4022   }
4023   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4024   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4025 
4026   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4027   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4028   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4029   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4030   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4031   PetscFunctionReturn(0);
4032 }
4033 
4034 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4035 
4036 #undef __FUNCT__
4037 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4038 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4039 {
4040   PetscErrorCode      ierr;
4041   Mat                 B_mpi;
4042   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4043   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4044   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4045   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4046   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4047   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4048   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4049   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4050   MPI_Status          *status;
4051   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4052   PetscBT             lnkbt;
4053   Mat_Merge_SeqsToMPI *merge;
4054   PetscContainer      container;
4055 
4056   PetscFunctionBegin;
4057   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4058 
4059   /* make sure it is a PETSc comm */
4060   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4061   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4062   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4063 
4064   ierr = PetscNew(&merge);CHKERRQ(ierr);
4065   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4066 
4067   /* determine row ownership */
4068   /*---------------------------------------------------------*/
4069   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4070   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4071   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4072   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4073   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4074   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4075   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4076 
4077   m      = merge->rowmap->n;
4078   owners = merge->rowmap->range;
4079 
4080   /* determine the number of messages to send, their lengths */
4081   /*---------------------------------------------------------*/
4082   len_s = merge->len_s;
4083 
4084   len          = 0; /* length of buf_si[] */
4085   merge->nsend = 0;
4086   for (proc=0; proc<size; proc++) {
4087     len_si[proc] = 0;
4088     if (proc == rank) {
4089       len_s[proc] = 0;
4090     } else {
4091       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4092       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4093     }
4094     if (len_s[proc]) {
4095       merge->nsend++;
4096       nrows = 0;
4097       for (i=owners[proc]; i<owners[proc+1]; i++) {
4098         if (ai[i+1] > ai[i]) nrows++;
4099       }
4100       len_si[proc] = 2*(nrows+1);
4101       len         += len_si[proc];
4102     }
4103   }
4104 
4105   /* determine the number and length of messages to receive for ij-structure */
4106   /*-------------------------------------------------------------------------*/
4107   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4108   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4109 
4110   /* post the Irecv of j-structure */
4111   /*-------------------------------*/
4112   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4113   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4114 
4115   /* post the Isend of j-structure */
4116   /*--------------------------------*/
4117   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4118 
4119   for (proc=0, k=0; proc<size; proc++) {
4120     if (!len_s[proc]) continue;
4121     i    = owners[proc];
4122     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4123     k++;
4124   }
4125 
4126   /* receives and sends of j-structure are complete */
4127   /*------------------------------------------------*/
4128   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4129   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4130 
4131   /* send and recv i-structure */
4132   /*---------------------------*/
4133   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4134   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4135 
4136   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4137   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4138   for (proc=0,k=0; proc<size; proc++) {
4139     if (!len_s[proc]) continue;
4140     /* form outgoing message for i-structure:
4141          buf_si[0]:                 nrows to be sent
4142                [1:nrows]:           row index (global)
4143                [nrows+1:2*nrows+1]: i-structure index
4144     */
4145     /*-------------------------------------------*/
4146     nrows       = len_si[proc]/2 - 1;
4147     buf_si_i    = buf_si + nrows+1;
4148     buf_si[0]   = nrows;
4149     buf_si_i[0] = 0;
4150     nrows       = 0;
4151     for (i=owners[proc]; i<owners[proc+1]; i++) {
4152       anzi = ai[i+1] - ai[i];
4153       if (anzi) {
4154         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4155         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4156         nrows++;
4157       }
4158     }
4159     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4160     k++;
4161     buf_si += len_si[proc];
4162   }
4163 
4164   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4165   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4166 
4167   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4168   for (i=0; i<merge->nrecv; i++) {
4169     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4170   }
4171 
4172   ierr = PetscFree(len_si);CHKERRQ(ierr);
4173   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4174   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4175   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4176   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4177   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4178   ierr = PetscFree(status);CHKERRQ(ierr);
4179 
4180   /* compute a local seq matrix in each processor */
4181   /*----------------------------------------------*/
4182   /* allocate bi array and free space for accumulating nonzero column info */
4183   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4184   bi[0] = 0;
4185 
4186   /* create and initialize a linked list */
4187   nlnk = N+1;
4188   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4189 
4190   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4191   len  = ai[owners[rank+1]] - ai[owners[rank]];
4192   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4193 
4194   current_space = free_space;
4195 
4196   /* determine symbolic info for each local row */
4197   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4198 
4199   for (k=0; k<merge->nrecv; k++) {
4200     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4201     nrows       = *buf_ri_k[k];
4202     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4203     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4204   }
4205 
4206   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4207   len  = 0;
4208   for (i=0; i<m; i++) {
4209     bnzi = 0;
4210     /* add local non-zero cols of this proc's seqmat into lnk */
4211     arow  = owners[rank] + i;
4212     anzi  = ai[arow+1] - ai[arow];
4213     aj    = a->j + ai[arow];
4214     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4215     bnzi += nlnk;
4216     /* add received col data into lnk */
4217     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4218       if (i == *nextrow[k]) { /* i-th row */
4219         anzi  = *(nextai[k]+1) - *nextai[k];
4220         aj    = buf_rj[k] + *nextai[k];
4221         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4222         bnzi += nlnk;
4223         nextrow[k]++; nextai[k]++;
4224       }
4225     }
4226     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4227 
4228     /* if free space is not available, make more free space */
4229     if (current_space->local_remaining<bnzi) {
4230       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4231       nspacedouble++;
4232     }
4233     /* copy data into free space, then initialize lnk */
4234     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4235     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4236 
4237     current_space->array           += bnzi;
4238     current_space->local_used      += bnzi;
4239     current_space->local_remaining -= bnzi;
4240 
4241     bi[i+1] = bi[i] + bnzi;
4242   }
4243 
4244   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4245 
4246   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4247   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4248   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4249 
4250   /* create symbolic parallel matrix B_mpi */
4251   /*---------------------------------------*/
4252   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4253   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4254   if (n==PETSC_DECIDE) {
4255     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4256   } else {
4257     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4258   }
4259   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4260   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4261   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4262   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4263   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4264 
4265   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4266   B_mpi->assembled    = PETSC_FALSE;
4267   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4268   merge->bi           = bi;
4269   merge->bj           = bj;
4270   merge->buf_ri       = buf_ri;
4271   merge->buf_rj       = buf_rj;
4272   merge->coi          = NULL;
4273   merge->coj          = NULL;
4274   merge->owners_co    = NULL;
4275 
4276   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4277 
4278   /* attach the supporting struct to B_mpi for reuse */
4279   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4280   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4281   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4282   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4283   *mpimat = B_mpi;
4284 
4285   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4286   PetscFunctionReturn(0);
4287 }
4288 
4289 #undef __FUNCT__
4290 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4291 /*@C
4292       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4293                  matrices from each processor
4294 
4295     Collective on MPI_Comm
4296 
4297    Input Parameters:
4298 +    comm - the communicators the parallel matrix will live on
4299 .    seqmat - the input sequential matrices
4300 .    m - number of local rows (or PETSC_DECIDE)
4301 .    n - number of local columns (or PETSC_DECIDE)
4302 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4303 
4304    Output Parameter:
4305 .    mpimat - the parallel matrix generated
4306 
4307     Level: advanced
4308 
4309    Notes:
4310      The dimensions of the sequential matrix in each processor MUST be the same.
4311      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4312      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4313 @*/
4314 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4315 {
4316   PetscErrorCode ierr;
4317   PetscMPIInt    size;
4318 
4319   PetscFunctionBegin;
4320   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4321   if (size == 1) {
4322     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4323     if (scall == MAT_INITIAL_MATRIX) {
4324       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4325     } else {
4326       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4327     }
4328     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4329     PetscFunctionReturn(0);
4330   }
4331   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4332   if (scall == MAT_INITIAL_MATRIX) {
4333     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4334   }
4335   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4336   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4337   PetscFunctionReturn(0);
4338 }
4339 
4340 #undef __FUNCT__
4341 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4342 /*@
4343      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4344           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4345           with MatGetSize()
4346 
4347     Not Collective
4348 
4349    Input Parameters:
4350 +    A - the matrix
4351 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4352 
4353    Output Parameter:
4354 .    A_loc - the local sequential matrix generated
4355 
4356     Level: developer
4357 
4358 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4359 
4360 @*/
4361 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4362 {
4363   PetscErrorCode ierr;
4364   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4365   Mat_SeqAIJ     *mat,*a,*b;
4366   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4367   MatScalar      *aa,*ba,*cam;
4368   PetscScalar    *ca;
4369   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4370   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4371   PetscBool      match;
4372   MPI_Comm       comm;
4373   PetscMPIInt    size;
4374 
4375   PetscFunctionBegin;
4376   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4377   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4378   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4379   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4380   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4381 
4382   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4383   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4384   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4385   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4386   aa = a->a; ba = b->a;
4387   if (scall == MAT_INITIAL_MATRIX) {
4388     if (size == 1) {
4389       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4390       PetscFunctionReturn(0);
4391     }
4392 
4393     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4394     ci[0] = 0;
4395     for (i=0; i<am; i++) {
4396       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4397     }
4398     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4399     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4400     k    = 0;
4401     for (i=0; i<am; i++) {
4402       ncols_o = bi[i+1] - bi[i];
4403       ncols_d = ai[i+1] - ai[i];
4404       /* off-diagonal portion of A */
4405       for (jo=0; jo<ncols_o; jo++) {
4406         col = cmap[*bj];
4407         if (col >= cstart) break;
4408         cj[k]   = col; bj++;
4409         ca[k++] = *ba++;
4410       }
4411       /* diagonal portion of A */
4412       for (j=0; j<ncols_d; j++) {
4413         cj[k]   = cstart + *aj++;
4414         ca[k++] = *aa++;
4415       }
4416       /* off-diagonal portion of A */
4417       for (j=jo; j<ncols_o; j++) {
4418         cj[k]   = cmap[*bj++];
4419         ca[k++] = *ba++;
4420       }
4421     }
4422     /* put together the new matrix */
4423     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4424     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4425     /* Since these are PETSc arrays, change flags to free them as necessary. */
4426     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4427     mat->free_a  = PETSC_TRUE;
4428     mat->free_ij = PETSC_TRUE;
4429     mat->nonew   = 0;
4430   } else if (scall == MAT_REUSE_MATRIX) {
4431     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4432     ci = mat->i; cj = mat->j; cam = mat->a;
4433     for (i=0; i<am; i++) {
4434       /* off-diagonal portion of A */
4435       ncols_o = bi[i+1] - bi[i];
4436       for (jo=0; jo<ncols_o; jo++) {
4437         col = cmap[*bj];
4438         if (col >= cstart) break;
4439         *cam++ = *ba++; bj++;
4440       }
4441       /* diagonal portion of A */
4442       ncols_d = ai[i+1] - ai[i];
4443       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4444       /* off-diagonal portion of A */
4445       for (j=jo; j<ncols_o; j++) {
4446         *cam++ = *ba++; bj++;
4447       }
4448     }
4449   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4450   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4451   PetscFunctionReturn(0);
4452 }
4453 
4454 #undef __FUNCT__
4455 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4456 /*@C
4457      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4458 
4459     Not Collective
4460 
4461    Input Parameters:
4462 +    A - the matrix
4463 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4464 -    row, col - index sets of rows and columns to extract (or NULL)
4465 
4466    Output Parameter:
4467 .    A_loc - the local sequential matrix generated
4468 
4469     Level: developer
4470 
4471 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4472 
4473 @*/
4474 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4475 {
4476   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4477   PetscErrorCode ierr;
4478   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4479   IS             isrowa,iscola;
4480   Mat            *aloc;
4481   PetscBool      match;
4482 
4483   PetscFunctionBegin;
4484   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4485   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4486   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4487   if (!row) {
4488     start = A->rmap->rstart; end = A->rmap->rend;
4489     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4490   } else {
4491     isrowa = *row;
4492   }
4493   if (!col) {
4494     start = A->cmap->rstart;
4495     cmap  = a->garray;
4496     nzA   = a->A->cmap->n;
4497     nzB   = a->B->cmap->n;
4498     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4499     ncols = 0;
4500     for (i=0; i<nzB; i++) {
4501       if (cmap[i] < start) idx[ncols++] = cmap[i];
4502       else break;
4503     }
4504     imark = i;
4505     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4506     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4507     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4508   } else {
4509     iscola = *col;
4510   }
4511   if (scall != MAT_INITIAL_MATRIX) {
4512     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4513     aloc[0] = *A_loc;
4514   }
4515   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4516   *A_loc = aloc[0];
4517   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4518   if (!row) {
4519     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4520   }
4521   if (!col) {
4522     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4523   }
4524   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4525   PetscFunctionReturn(0);
4526 }
4527 
4528 #undef __FUNCT__
4529 #define __FUNCT__ "MatGetBrowsOfAcols"
4530 /*@C
4531     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4532 
4533     Collective on Mat
4534 
4535    Input Parameters:
4536 +    A,B - the matrices in mpiaij format
4537 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4538 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4539 
4540    Output Parameter:
4541 +    rowb, colb - index sets of rows and columns of B to extract
4542 -    B_seq - the sequential matrix generated
4543 
4544     Level: developer
4545 
4546 @*/
4547 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4548 {
4549   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4550   PetscErrorCode ierr;
4551   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4552   IS             isrowb,iscolb;
4553   Mat            *bseq=NULL;
4554 
4555   PetscFunctionBegin;
4556   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4557     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4558   }
4559   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4560 
4561   if (scall == MAT_INITIAL_MATRIX) {
4562     start = A->cmap->rstart;
4563     cmap  = a->garray;
4564     nzA   = a->A->cmap->n;
4565     nzB   = a->B->cmap->n;
4566     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4567     ncols = 0;
4568     for (i=0; i<nzB; i++) {  /* row < local row index */
4569       if (cmap[i] < start) idx[ncols++] = cmap[i];
4570       else break;
4571     }
4572     imark = i;
4573     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4574     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4575     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4576     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4577   } else {
4578     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4579     isrowb  = *rowb; iscolb = *colb;
4580     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4581     bseq[0] = *B_seq;
4582   }
4583   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4584   *B_seq = bseq[0];
4585   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4586   if (!rowb) {
4587     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4588   } else {
4589     *rowb = isrowb;
4590   }
4591   if (!colb) {
4592     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4593   } else {
4594     *colb = iscolb;
4595   }
4596   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4597   PetscFunctionReturn(0);
4598 }
4599 
4600 #undef __FUNCT__
4601 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4602 /*
4603     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4604     of the OFF-DIAGONAL portion of local A
4605 
4606     Collective on Mat
4607 
4608    Input Parameters:
4609 +    A,B - the matrices in mpiaij format
4610 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4611 
4612    Output Parameter:
4613 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4614 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4615 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4616 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4617 
4618     Level: developer
4619 
4620 */
4621 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4622 {
4623   VecScatter_MPI_General *gen_to,*gen_from;
4624   PetscErrorCode         ierr;
4625   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4626   Mat_SeqAIJ             *b_oth;
4627   VecScatter             ctx =a->Mvctx;
4628   MPI_Comm               comm;
4629   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4630   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4631   PetscScalar            *rvalues,*svalues;
4632   MatScalar              *b_otha,*bufa,*bufA;
4633   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4634   MPI_Request            *rwaits = NULL,*swaits = NULL;
4635   MPI_Status             *sstatus,rstatus;
4636   PetscMPIInt            jj,size;
4637   PetscInt               *cols,sbs,rbs;
4638   PetscScalar            *vals;
4639 
4640   PetscFunctionBegin;
4641   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4642   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4643 
4644   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4645     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4646   }
4647   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4648   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4649 
4650   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4651   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4652   rvalues  = gen_from->values; /* holds the length of receiving row */
4653   svalues  = gen_to->values;   /* holds the length of sending row */
4654   nrecvs   = gen_from->n;
4655   nsends   = gen_to->n;
4656 
4657   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4658   srow    = gen_to->indices;    /* local row index to be sent */
4659   sstarts = gen_to->starts;
4660   sprocs  = gen_to->procs;
4661   sstatus = gen_to->sstatus;
4662   sbs     = gen_to->bs;
4663   rstarts = gen_from->starts;
4664   rprocs  = gen_from->procs;
4665   rbs     = gen_from->bs;
4666 
4667   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4668   if (scall == MAT_INITIAL_MATRIX) {
4669     /* i-array */
4670     /*---------*/
4671     /*  post receives */
4672     for (i=0; i<nrecvs; i++) {
4673       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4674       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4675       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4676     }
4677 
4678     /* pack the outgoing message */
4679     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4680 
4681     sstartsj[0] = 0;
4682     rstartsj[0] = 0;
4683     len         = 0; /* total length of j or a array to be sent */
4684     k           = 0;
4685     for (i=0; i<nsends; i++) {
4686       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4687       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4688       for (j=0; j<nrows; j++) {
4689         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4690         for (l=0; l<sbs; l++) {
4691           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4692 
4693           rowlen[j*sbs+l] = ncols;
4694 
4695           len += ncols;
4696           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4697         }
4698         k++;
4699       }
4700       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4701 
4702       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4703     }
4704     /* recvs and sends of i-array are completed */
4705     i = nrecvs;
4706     while (i--) {
4707       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4708     }
4709     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4710 
4711     /* allocate buffers for sending j and a arrays */
4712     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4713     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4714 
4715     /* create i-array of B_oth */
4716     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4717 
4718     b_othi[0] = 0;
4719     len       = 0; /* total length of j or a array to be received */
4720     k         = 0;
4721     for (i=0; i<nrecvs; i++) {
4722       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4723       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4724       for (j=0; j<nrows; j++) {
4725         b_othi[k+1] = b_othi[k] + rowlen[j];
4726         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4727         k++;
4728       }
4729       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4730     }
4731 
4732     /* allocate space for j and a arrrays of B_oth */
4733     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4734     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4735 
4736     /* j-array */
4737     /*---------*/
4738     /*  post receives of j-array */
4739     for (i=0; i<nrecvs; i++) {
4740       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4741       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4742     }
4743 
4744     /* pack the outgoing message j-array */
4745     k = 0;
4746     for (i=0; i<nsends; i++) {
4747       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4748       bufJ  = bufj+sstartsj[i];
4749       for (j=0; j<nrows; j++) {
4750         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4751         for (ll=0; ll<sbs; ll++) {
4752           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4753           for (l=0; l<ncols; l++) {
4754             *bufJ++ = cols[l];
4755           }
4756           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4757         }
4758       }
4759       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4760     }
4761 
4762     /* recvs and sends of j-array are completed */
4763     i = nrecvs;
4764     while (i--) {
4765       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4766     }
4767     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4768   } else if (scall == MAT_REUSE_MATRIX) {
4769     sstartsj = *startsj_s;
4770     rstartsj = *startsj_r;
4771     bufa     = *bufa_ptr;
4772     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4773     b_otha   = b_oth->a;
4774   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4775 
4776   /* a-array */
4777   /*---------*/
4778   /*  post receives of a-array */
4779   for (i=0; i<nrecvs; i++) {
4780     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4781     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4782   }
4783 
4784   /* pack the outgoing message a-array */
4785   k = 0;
4786   for (i=0; i<nsends; i++) {
4787     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4788     bufA  = bufa+sstartsj[i];
4789     for (j=0; j<nrows; j++) {
4790       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4791       for (ll=0; ll<sbs; ll++) {
4792         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4793         for (l=0; l<ncols; l++) {
4794           *bufA++ = vals[l];
4795         }
4796         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4797       }
4798     }
4799     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4800   }
4801   /* recvs and sends of a-array are completed */
4802   i = nrecvs;
4803   while (i--) {
4804     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4805   }
4806   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4807   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4808 
4809   if (scall == MAT_INITIAL_MATRIX) {
4810     /* put together the new matrix */
4811     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4812 
4813     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4814     /* Since these are PETSc arrays, change flags to free them as necessary. */
4815     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4816     b_oth->free_a  = PETSC_TRUE;
4817     b_oth->free_ij = PETSC_TRUE;
4818     b_oth->nonew   = 0;
4819 
4820     ierr = PetscFree(bufj);CHKERRQ(ierr);
4821     if (!startsj_s || !bufa_ptr) {
4822       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4823       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4824     } else {
4825       *startsj_s = sstartsj;
4826       *startsj_r = rstartsj;
4827       *bufa_ptr  = bufa;
4828     }
4829   }
4830   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4831   PetscFunctionReturn(0);
4832 }
4833 
4834 #undef __FUNCT__
4835 #define __FUNCT__ "MatGetCommunicationStructs"
4836 /*@C
4837   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4838 
4839   Not Collective
4840 
4841   Input Parameters:
4842 . A - The matrix in mpiaij format
4843 
4844   Output Parameter:
4845 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4846 . colmap - A map from global column index to local index into lvec
4847 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4848 
4849   Level: developer
4850 
4851 @*/
4852 #if defined(PETSC_USE_CTABLE)
4853 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4854 #else
4855 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4856 #endif
4857 {
4858   Mat_MPIAIJ *a;
4859 
4860   PetscFunctionBegin;
4861   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4862   PetscValidPointer(lvec, 2);
4863   PetscValidPointer(colmap, 3);
4864   PetscValidPointer(multScatter, 4);
4865   a = (Mat_MPIAIJ*) A->data;
4866   if (lvec) *lvec = a->lvec;
4867   if (colmap) *colmap = a->colmap;
4868   if (multScatter) *multScatter = a->Mvctx;
4869   PetscFunctionReturn(0);
4870 }
4871 
4872 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4873 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4874 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4875 #if defined(PETSC_HAVE_ELEMENTAL)
4876 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4877 #endif
4878 #if defined(PETSC_HAVE_HYPRE)
4879 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
4880 #endif
4881 
4882 #undef __FUNCT__
4883 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4884 /*
4885     Computes (B'*A')' since computing B*A directly is untenable
4886 
4887                n                       p                          p
4888         (              )       (              )         (                  )
4889       m (      A       )  *  n (       B      )   =   m (         C        )
4890         (              )       (              )         (                  )
4891 
4892 */
4893 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4894 {
4895   PetscErrorCode ierr;
4896   Mat            At,Bt,Ct;
4897 
4898   PetscFunctionBegin;
4899   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4900   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4901   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4902   ierr = MatDestroy(&At);CHKERRQ(ierr);
4903   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4904   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4905   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4906   PetscFunctionReturn(0);
4907 }
4908 
4909 #undef __FUNCT__
4910 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4911 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4912 {
4913   PetscErrorCode ierr;
4914   PetscInt       m=A->rmap->n,n=B->cmap->n;
4915   Mat            Cmat;
4916 
4917   PetscFunctionBegin;
4918   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4919   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4920   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4921   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4922   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4923   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4924   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4925   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4926 
4927   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4928 
4929   *C = Cmat;
4930   PetscFunctionReturn(0);
4931 }
4932 
4933 /* ----------------------------------------------------------------*/
4934 #undef __FUNCT__
4935 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4936 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4937 {
4938   PetscErrorCode ierr;
4939 
4940   PetscFunctionBegin;
4941   if (scall == MAT_INITIAL_MATRIX) {
4942     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4943     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4944     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4945   }
4946   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4947   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4948   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4949   PetscFunctionReturn(0);
4950 }
4951 
4952 /*MC
4953    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4954 
4955    Options Database Keys:
4956 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4957 
4958   Level: beginner
4959 
4960 .seealso: MatCreateAIJ()
4961 M*/
4962 
4963 #undef __FUNCT__
4964 #define __FUNCT__ "MatCreate_MPIAIJ"
4965 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
4966 {
4967   Mat_MPIAIJ     *b;
4968   PetscErrorCode ierr;
4969   PetscMPIInt    size;
4970 
4971   PetscFunctionBegin;
4972   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
4973 
4974   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
4975   B->data       = (void*)b;
4976   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4977   B->assembled  = PETSC_FALSE;
4978   B->insertmode = NOT_SET_VALUES;
4979   b->size       = size;
4980 
4981   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
4982 
4983   /* build cache for off array entries formed */
4984   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
4985 
4986   b->donotstash  = PETSC_FALSE;
4987   b->colmap      = 0;
4988   b->garray      = 0;
4989   b->roworiented = PETSC_TRUE;
4990 
4991   /* stuff used for matrix vector multiply */
4992   b->lvec  = NULL;
4993   b->Mvctx = NULL;
4994 
4995   /* stuff for MatGetRow() */
4996   b->rowindices   = 0;
4997   b->rowvalues    = 0;
4998   b->getrowactive = PETSC_FALSE;
4999 
5000   /* flexible pointer used in CUSP/CUSPARSE classes */
5001   b->spptr = NULL;
5002 
5003   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5004   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5005   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5006   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5007   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5008   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5009   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5010   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5011   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5012   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5013 #if defined(PETSC_HAVE_ELEMENTAL)
5014   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5015 #endif
5016 #if defined(PETSC_HAVE_HYPRE)
5017   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5018 #endif
5019   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5020   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5021   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5022   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5023   PetscFunctionReturn(0);
5024 }
5025 
5026 #undef __FUNCT__
5027 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5028 /*@C
5029      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5030          and "off-diagonal" part of the matrix in CSR format.
5031 
5032    Collective on MPI_Comm
5033 
5034    Input Parameters:
5035 +  comm - MPI communicator
5036 .  m - number of local rows (Cannot be PETSC_DECIDE)
5037 .  n - This value should be the same as the local size used in creating the
5038        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5039        calculated if N is given) For square matrices n is almost always m.
5040 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5041 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5042 .   i - row indices for "diagonal" portion of matrix
5043 .   j - column indices
5044 .   a - matrix values
5045 .   oi - row indices for "off-diagonal" portion of matrix
5046 .   oj - column indices
5047 -   oa - matrix values
5048 
5049    Output Parameter:
5050 .   mat - the matrix
5051 
5052    Level: advanced
5053 
5054    Notes:
5055        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5056        must free the arrays once the matrix has been destroyed and not before.
5057 
5058        The i and j indices are 0 based
5059 
5060        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5061 
5062        This sets local rows and cannot be used to set off-processor values.
5063 
5064        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5065        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5066        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5067        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5068        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5069        communication if it is known that only local entries will be set.
5070 
5071 .keywords: matrix, aij, compressed row, sparse, parallel
5072 
5073 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5074           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5075 @*/
5076 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5077 {
5078   PetscErrorCode ierr;
5079   Mat_MPIAIJ     *maij;
5080 
5081   PetscFunctionBegin;
5082   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5083   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5084   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5085   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5086   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5087   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5088   maij = (Mat_MPIAIJ*) (*mat)->data;
5089 
5090   (*mat)->preallocated = PETSC_TRUE;
5091 
5092   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5093   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5094 
5095   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5096   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5097 
5098   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5099   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5100   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5101   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5102 
5103   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5104   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5105   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5106   PetscFunctionReturn(0);
5107 }
5108 
5109 /*
5110     Special version for direct calls from Fortran
5111 */
5112 #include <petsc/private/fortranimpl.h>
5113 
5114 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5115 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5116 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5117 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5118 #endif
5119 
5120 /* Change these macros so can be used in void function */
5121 #undef CHKERRQ
5122 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5123 #undef SETERRQ2
5124 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5125 #undef SETERRQ3
5126 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5127 #undef SETERRQ
5128 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5129 
5130 #undef __FUNCT__
5131 #define __FUNCT__ "matsetvaluesmpiaij_"
5132 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5133 {
5134   Mat            mat  = *mmat;
5135   PetscInt       m    = *mm, n = *mn;
5136   InsertMode     addv = *maddv;
5137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5138   PetscScalar    value;
5139   PetscErrorCode ierr;
5140 
5141   MatCheckPreallocated(mat,1);
5142   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5143 
5144 #if defined(PETSC_USE_DEBUG)
5145   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5146 #endif
5147   {
5148     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5149     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5150     PetscBool roworiented = aij->roworiented;
5151 
5152     /* Some Variables required in the macro */
5153     Mat        A                 = aij->A;
5154     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5155     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5156     MatScalar  *aa               = a->a;
5157     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5158     Mat        B                 = aij->B;
5159     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5160     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5161     MatScalar  *ba               = b->a;
5162 
5163     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5164     PetscInt  nonew = a->nonew;
5165     MatScalar *ap1,*ap2;
5166 
5167     PetscFunctionBegin;
5168     for (i=0; i<m; i++) {
5169       if (im[i] < 0) continue;
5170 #if defined(PETSC_USE_DEBUG)
5171       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5172 #endif
5173       if (im[i] >= rstart && im[i] < rend) {
5174         row      = im[i] - rstart;
5175         lastcol1 = -1;
5176         rp1      = aj + ai[row];
5177         ap1      = aa + ai[row];
5178         rmax1    = aimax[row];
5179         nrow1    = ailen[row];
5180         low1     = 0;
5181         high1    = nrow1;
5182         lastcol2 = -1;
5183         rp2      = bj + bi[row];
5184         ap2      = ba + bi[row];
5185         rmax2    = bimax[row];
5186         nrow2    = bilen[row];
5187         low2     = 0;
5188         high2    = nrow2;
5189 
5190         for (j=0; j<n; j++) {
5191           if (roworiented) value = v[i*n+j];
5192           else value = v[i+j*m];
5193           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5194           if (in[j] >= cstart && in[j] < cend) {
5195             col = in[j] - cstart;
5196             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5197           } else if (in[j] < 0) continue;
5198 #if defined(PETSC_USE_DEBUG)
5199           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5200 #endif
5201           else {
5202             if (mat->was_assembled) {
5203               if (!aij->colmap) {
5204                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5205               }
5206 #if defined(PETSC_USE_CTABLE)
5207               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5208               col--;
5209 #else
5210               col = aij->colmap[in[j]] - 1;
5211 #endif
5212               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5213                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5214                 col  =  in[j];
5215                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5216                 B     = aij->B;
5217                 b     = (Mat_SeqAIJ*)B->data;
5218                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5219                 rp2   = bj + bi[row];
5220                 ap2   = ba + bi[row];
5221                 rmax2 = bimax[row];
5222                 nrow2 = bilen[row];
5223                 low2  = 0;
5224                 high2 = nrow2;
5225                 bm    = aij->B->rmap->n;
5226                 ba    = b->a;
5227               }
5228             } else col = in[j];
5229             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5230           }
5231         }
5232       } else if (!aij->donotstash) {
5233         if (roworiented) {
5234           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5235         } else {
5236           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5237         }
5238       }
5239     }
5240   }
5241   PetscFunctionReturnVoid();
5242 }
5243 
5244