xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6583bcc11b01b1dbe9e9259b469643df332bd07a)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *lrows;
787   PetscInt       r, len;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791   /* get locally owned rows */
792   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
793   /* fix right hand side if needed */
794   if (x && b) {
795     const PetscScalar *xx;
796     PetscScalar       *bb;
797 
798     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
799     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
800     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
801     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
802     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
803   }
804   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
805   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
806   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
807     PetscBool cong;
808     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
809     if (cong) A->congruentlayouts = 1;
810     else      A->congruentlayouts = 0;
811   }
812   if ((diag != 0.0) && A->congruentlayouts) {
813     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
814   } else if (diag != 0.0) {
815     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827 
828   /* only change matrix nonzero state if pattern was allowed to be changed */
829   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
830     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
831     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,p = 0,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x) {
888     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
891     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
892   }
893   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
894   /* remove zeroed rows of off diagonal matrix */
895   ii = aij->i;
896   for (i=0; i<len; i++) {
897     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
898   }
899   /* loop over all elements of off process part of matrix zeroing removed columns*/
900   if (aij->compressedrow.use) {
901     m    = aij->compressedrow.nrows;
902     ii   = aij->compressedrow.i;
903     ridx = aij->compressedrow.rindex;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908 
909       for (j=0; j<n; j++) {
910         if (PetscAbsScalar(mask[*aj])) {
911           if (b) bb[*ridx] -= *aa*xx[*aj];
912           *aa = 0.0;
913         }
914         aa++;
915         aj++;
916       }
917       ridx++;
918     }
919   } else { /* do not use compressed row format */
920     m = l->B->rmap->n;
921     for (i=0; i<m; i++) {
922       n  = ii[i+1] - ii[i];
923       aj = aij->j + ii[i];
924       aa = aij->a + ii[i];
925       for (j=0; j<n; j++) {
926         if (PetscAbsScalar(mask[*aj])) {
927           if (b) bb[i] -= *aa*xx[*aj];
928           *aa = 0.0;
929         }
930         aa++;
931         aj++;
932       }
933     }
934   }
935   if (x) {
936     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
937     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
938   }
939   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
940   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
941   ierr = PetscFree(lrows);CHKERRQ(ierr);
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMult_MPIAIJ"
953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
954 {
955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       nt;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
962   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMultAdd_MPIAIJ"
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002   PetscBool      merged;
1003 
1004   PetscFunctionBegin;
1005   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1006   /* do nondiagonal part */
1007   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1008   if (!merged) {
1009     /* send it on its way */
1010     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* receive remote parts: note this assumes the values are not actually */
1014     /* added in yy until the next line, */
1015     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   } else {
1017     /* do local part */
1018     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1019     /* send it on its way */
1020     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021     /* values actually were received in the Begin() but we need to call this nop */
1022     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1029 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1030 {
1031   MPI_Comm       comm;
1032   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1033   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1034   IS             Me,Notme;
1035   PetscErrorCode ierr;
1036   PetscInt       M,N,first,last,*notme,i;
1037   PetscMPIInt    size;
1038 
1039   PetscFunctionBegin;
1040   /* Easy test: symmetric diagonal block */
1041   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1042   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   /* do nondiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   /* send it on its way */
1080   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   /* do local part */
1082   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* receive remote parts */
1084   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /*
1089   This only works correctly for square matrices where the subblock A->A is the
1090    diagonal block
1091 */
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1095 {
1096   PetscErrorCode ierr;
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1101   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1102   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatScale_MPIAIJ"
1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1115   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatDestroy_MPIAIJ"
1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1122 {
1123   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127 #if defined(PETSC_USE_LOG)
1128   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1129 #endif
1130   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1131   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1132   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1133   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1134 #if defined(PETSC_USE_CTABLE)
1135   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1136 #else
1137   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1138 #endif
1139   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1140   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1141   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1142   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1143   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1144   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1145 
1146   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1154   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1155 #if defined(PETSC_HAVE_ELEMENTAL)
1156   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1157 #endif
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1163 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1164 {
1165   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1166   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1167   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1168   PetscErrorCode ierr;
1169   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1170   int            fd;
1171   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1172   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1173   PetscScalar    *column_values;
1174   PetscInt       message_count,flowcontrolcount;
1175   FILE           *file;
1176 
1177   PetscFunctionBegin;
1178   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1180   nz   = A->nz + B->nz;
1181   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1182   if (!rank) {
1183     header[0] = MAT_FILE_CLASSID;
1184     header[1] = mat->rmap->N;
1185     header[2] = mat->cmap->N;
1186 
1187     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1188     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1189     /* get largest number of rows any processor has */
1190     rlen  = mat->rmap->n;
1191     range = mat->rmap->range;
1192     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1193   } else {
1194     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1195     rlen = mat->rmap->n;
1196   }
1197 
1198   /* load up the local row counts */
1199   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1200   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1201 
1202   /* store the row lengths to the file */
1203   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1204   if (!rank) {
1205     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1206     for (i=1; i<size; i++) {
1207       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1208       rlen = range[i+1] - range[i];
1209       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1210       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1211     }
1212     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1213   } else {
1214     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1215     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1216     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1217   }
1218   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1219 
1220   /* load up the local column indices */
1221   nzmax = nz; /* th processor needs space a largest processor needs */
1222   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1223   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1224   cnt   = 0;
1225   for (i=0; i<mat->rmap->n; i++) {
1226     for (j=B->i[i]; j<B->i[i+1]; j++) {
1227       if ((col = garray[B->j[j]]) > cstart) break;
1228       column_indices[cnt++] = col;
1229     }
1230     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1231     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1232   }
1233   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1234 
1235   /* store the column indices to the file */
1236   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1237   if (!rank) {
1238     MPI_Status status;
1239     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1240     for (i=1; i<size; i++) {
1241       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1242       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1243       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1244       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1245       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1246     }
1247     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1248   } else {
1249     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1250     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1251     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1252     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1253   }
1254   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1255 
1256   /* load up the local column values */
1257   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1258   cnt  = 0;
1259   for (i=0; i<mat->rmap->n; i++) {
1260     for (j=B->i[i]; j<B->i[i+1]; j++) {
1261       if (garray[B->j[j]] > cstart) break;
1262       column_values[cnt++] = B->a[j];
1263     }
1264     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1265     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1266   }
1267   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1268 
1269   /* store the column values to the file */
1270   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1271   if (!rank) {
1272     MPI_Status status;
1273     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1274     for (i=1; i<size; i++) {
1275       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1276       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1277       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1278       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1279       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1280     }
1281     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1282   } else {
1283     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1284     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1285     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1286     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1287   }
1288   ierr = PetscFree(column_values);CHKERRQ(ierr);
1289 
1290   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1291   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #include <petscdraw.h>
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1298 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1299 {
1300   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1301   PetscErrorCode    ierr;
1302   PetscMPIInt       rank = aij->rank,size = aij->size;
1303   PetscBool         isdraw,iascii,isbinary;
1304   PetscViewer       sviewer;
1305   PetscViewerFormat format;
1306 
1307   PetscFunctionBegin;
1308   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1309   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1310   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1311   if (iascii) {
1312     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1313     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1314       MatInfo   info;
1315       PetscBool inodes;
1316 
1317       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1318       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1319       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1320       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1321       if (!inodes) {
1322         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1323                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1324       } else {
1325         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1326                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1327       }
1328       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1329       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1330       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1331       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1332       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1333       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1334       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1335       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1336       PetscFunctionReturn(0);
1337     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1338       PetscInt inodecount,inodelimit,*inodes;
1339       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1340       if (inodes) {
1341         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1342       } else {
1343         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1344       }
1345       PetscFunctionReturn(0);
1346     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1347       PetscFunctionReturn(0);
1348     }
1349   } else if (isbinary) {
1350     if (size == 1) {
1351       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1352       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1353     } else {
1354       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1355     }
1356     PetscFunctionReturn(0);
1357   } else if (isdraw) {
1358     PetscDraw draw;
1359     PetscBool isnull;
1360     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1361     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1362     if (isnull) PetscFunctionReturn(0);
1363   }
1364 
1365   {
1366     /* assemble the entire matrix onto first processor. */
1367     Mat        A;
1368     Mat_SeqAIJ *Aloc;
1369     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1370     MatScalar  *a;
1371 
1372     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1373     if (!rank) {
1374       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1375     } else {
1376       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1377     }
1378     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1379     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1380     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1381     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1382     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1383 
1384     /* copy over the A part */
1385     Aloc = (Mat_SeqAIJ*)aij->A->data;
1386     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1387     row  = mat->rmap->rstart;
1388     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1389     for (i=0; i<m; i++) {
1390       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1391       row++;
1392       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1393     }
1394     aj = Aloc->j;
1395     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1396 
1397     /* copy over the B part */
1398     Aloc = (Mat_SeqAIJ*)aij->B->data;
1399     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1400     row  = mat->rmap->rstart;
1401     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1402     ct   = cols;
1403     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1404     for (i=0; i<m; i++) {
1405       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1406       row++;
1407       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1408     }
1409     ierr = PetscFree(ct);CHKERRQ(ierr);
1410     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412     /*
1413        Everyone has to call to draw the matrix since the graphics waits are
1414        synchronized across all processors that share the PetscDraw object
1415     */
1416     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1417     if (!rank) {
1418       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1419       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1420     }
1421     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1422     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1423     ierr = MatDestroy(&A);CHKERRQ(ierr);
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatView_MPIAIJ"
1430 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1431 {
1432   PetscErrorCode ierr;
1433   PetscBool      iascii,isdraw,issocket,isbinary;
1434 
1435   PetscFunctionBegin;
1436   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1437   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1438   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1439   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1440   if (iascii || isdraw || isbinary || issocket) {
1441     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1442   }
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatSOR_MPIAIJ"
1448 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1449 {
1450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1451   PetscErrorCode ierr;
1452   Vec            bb1 = 0;
1453   PetscBool      hasop;
1454 
1455   PetscFunctionBegin;
1456   if (flag == SOR_APPLY_UPPER) {
1457     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1458     PetscFunctionReturn(0);
1459   }
1460 
1461   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1462     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1463   }
1464 
1465   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1466     if (flag & SOR_ZERO_INITIAL_GUESS) {
1467       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1468       its--;
1469     }
1470 
1471     while (its--) {
1472       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1473       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1474 
1475       /* update rhs: bb1 = bb - B*x */
1476       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1477       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1478 
1479       /* local sweep */
1480       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1481     }
1482   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1483     if (flag & SOR_ZERO_INITIAL_GUESS) {
1484       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1485       its--;
1486     }
1487     while (its--) {
1488       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1489       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1490 
1491       /* update rhs: bb1 = bb - B*x */
1492       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1493       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1494 
1495       /* local sweep */
1496       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1497     }
1498   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1499     if (flag & SOR_ZERO_INITIAL_GUESS) {
1500       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1501       its--;
1502     }
1503     while (its--) {
1504       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506 
1507       /* update rhs: bb1 = bb - B*x */
1508       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1509       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1510 
1511       /* local sweep */
1512       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1513     }
1514   } else if (flag & SOR_EISENSTAT) {
1515     Vec xx1;
1516 
1517     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1518     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1519 
1520     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1521     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1522     if (!mat->diag) {
1523       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1524       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1525     }
1526     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1527     if (hasop) {
1528       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1529     } else {
1530       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1531     }
1532     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1533 
1534     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1535 
1536     /* local sweep */
1537     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1538     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1539     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1540   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1541 
1542   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1543 
1544   matin->errortype = mat->A->errortype;
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatPermute_MPIAIJ"
1550 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1551 {
1552   Mat            aA,aB,Aperm;
1553   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1554   PetscScalar    *aa,*ba;
1555   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1556   PetscSF        rowsf,sf;
1557   IS             parcolp = NULL;
1558   PetscBool      done;
1559   PetscErrorCode ierr;
1560 
1561   PetscFunctionBegin;
1562   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1563   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1564   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1565   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1566 
1567   /* Invert row permutation to find out where my rows should go */
1568   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1569   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1570   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1571   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1572   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1573   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1574 
1575   /* Invert column permutation to find out where my columns should go */
1576   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1577   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1578   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1579   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1580   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1581   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1582   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1583 
1584   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1585   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1586   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1587 
1588   /* Find out where my gcols should go */
1589   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1590   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1591   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1592   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1593   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1594   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1595   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1596   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1597 
1598   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1599   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1600   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1601   for (i=0; i<m; i++) {
1602     PetscInt row = rdest[i],rowner;
1603     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1604     for (j=ai[i]; j<ai[i+1]; j++) {
1605       PetscInt cowner,col = cdest[aj[j]];
1606       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1607       if (rowner == cowner) dnnz[i]++;
1608       else onnz[i]++;
1609     }
1610     for (j=bi[i]; j<bi[i+1]; j++) {
1611       PetscInt cowner,col = gcdest[bj[j]];
1612       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1613       if (rowner == cowner) dnnz[i]++;
1614       else onnz[i]++;
1615     }
1616   }
1617   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1618   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1619   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1620   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1621   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1622 
1623   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1624   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1625   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1626   for (i=0; i<m; i++) {
1627     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1628     PetscInt j0,rowlen;
1629     rowlen = ai[i+1] - ai[i];
1630     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1631       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1632       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1633     }
1634     rowlen = bi[i+1] - bi[i];
1635     for (j0=j=0; j<rowlen; j0=j) {
1636       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1637       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1638     }
1639   }
1640   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1641   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1643   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1644   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1645   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1646   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1647   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1648   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1649   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1650   *B = Aperm;
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 #undef __FUNCT__
1655 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1656 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1657 {
1658   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1659   PetscErrorCode ierr;
1660 
1661   PetscFunctionBegin;
1662   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1663   if (ghosts) *ghosts = aij->garray;
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1669 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1670 {
1671   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1672   Mat            A    = mat->A,B = mat->B;
1673   PetscErrorCode ierr;
1674   PetscReal      isend[5],irecv[5];
1675 
1676   PetscFunctionBegin;
1677   info->block_size = 1.0;
1678   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1679 
1680   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1681   isend[3] = info->memory;  isend[4] = info->mallocs;
1682 
1683   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1684 
1685   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1686   isend[3] += info->memory;  isend[4] += info->mallocs;
1687   if (flag == MAT_LOCAL) {
1688     info->nz_used      = isend[0];
1689     info->nz_allocated = isend[1];
1690     info->nz_unneeded  = isend[2];
1691     info->memory       = isend[3];
1692     info->mallocs      = isend[4];
1693   } else if (flag == MAT_GLOBAL_MAX) {
1694     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1695 
1696     info->nz_used      = irecv[0];
1697     info->nz_allocated = irecv[1];
1698     info->nz_unneeded  = irecv[2];
1699     info->memory       = irecv[3];
1700     info->mallocs      = irecv[4];
1701   } else if (flag == MAT_GLOBAL_SUM) {
1702     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1703 
1704     info->nz_used      = irecv[0];
1705     info->nz_allocated = irecv[1];
1706     info->nz_unneeded  = irecv[2];
1707     info->memory       = irecv[3];
1708     info->mallocs      = irecv[4];
1709   }
1710   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1711   info->fill_ratio_needed = 0;
1712   info->factor_mallocs    = 0;
1713   PetscFunctionReturn(0);
1714 }
1715 
1716 #undef __FUNCT__
1717 #define __FUNCT__ "MatSetOption_MPIAIJ"
1718 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1719 {
1720   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1721   PetscErrorCode ierr;
1722 
1723   PetscFunctionBegin;
1724   switch (op) {
1725   case MAT_NEW_NONZERO_LOCATIONS:
1726   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1727   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1728   case MAT_KEEP_NONZERO_PATTERN:
1729   case MAT_NEW_NONZERO_LOCATION_ERR:
1730   case MAT_USE_INODES:
1731   case MAT_IGNORE_ZERO_ENTRIES:
1732     MatCheckPreallocated(A,1);
1733     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1734     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1735     break;
1736   case MAT_ROW_ORIENTED:
1737     MatCheckPreallocated(A,1);
1738     a->roworiented = flg;
1739 
1740     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1741     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1742     break;
1743   case MAT_NEW_DIAGONALS:
1744     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1745     break;
1746   case MAT_IGNORE_OFF_PROC_ENTRIES:
1747     a->donotstash = flg;
1748     break;
1749   case MAT_SPD:
1750     A->spd_set = PETSC_TRUE;
1751     A->spd     = flg;
1752     if (flg) {
1753       A->symmetric                  = PETSC_TRUE;
1754       A->structurally_symmetric     = PETSC_TRUE;
1755       A->symmetric_set              = PETSC_TRUE;
1756       A->structurally_symmetric_set = PETSC_TRUE;
1757     }
1758     break;
1759   case MAT_SYMMETRIC:
1760     MatCheckPreallocated(A,1);
1761     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1762     break;
1763   case MAT_STRUCTURALLY_SYMMETRIC:
1764     MatCheckPreallocated(A,1);
1765     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1766     break;
1767   case MAT_HERMITIAN:
1768     MatCheckPreallocated(A,1);
1769     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1770     break;
1771   case MAT_SYMMETRY_ETERNAL:
1772     MatCheckPreallocated(A,1);
1773     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1774     break;
1775   default:
1776     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1777   }
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 #undef __FUNCT__
1782 #define __FUNCT__ "MatGetRow_MPIAIJ"
1783 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1784 {
1785   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1786   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1787   PetscErrorCode ierr;
1788   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1789   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1790   PetscInt       *cmap,*idx_p;
1791 
1792   PetscFunctionBegin;
1793   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1794   mat->getrowactive = PETSC_TRUE;
1795 
1796   if (!mat->rowvalues && (idx || v)) {
1797     /*
1798         allocate enough space to hold information from the longest row.
1799     */
1800     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1801     PetscInt   max = 1,tmp;
1802     for (i=0; i<matin->rmap->n; i++) {
1803       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1804       if (max < tmp) max = tmp;
1805     }
1806     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1807   }
1808 
1809   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1810   lrow = row - rstart;
1811 
1812   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1813   if (!v)   {pvA = 0; pvB = 0;}
1814   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1815   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1816   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1817   nztot = nzA + nzB;
1818 
1819   cmap = mat->garray;
1820   if (v  || idx) {
1821     if (nztot) {
1822       /* Sort by increasing column numbers, assuming A and B already sorted */
1823       PetscInt imark = -1;
1824       if (v) {
1825         *v = v_p = mat->rowvalues;
1826         for (i=0; i<nzB; i++) {
1827           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1828           else break;
1829         }
1830         imark = i;
1831         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1832         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1833       }
1834       if (idx) {
1835         *idx = idx_p = mat->rowindices;
1836         if (imark > -1) {
1837           for (i=0; i<imark; i++) {
1838             idx_p[i] = cmap[cworkB[i]];
1839           }
1840         } else {
1841           for (i=0; i<nzB; i++) {
1842             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1843             else break;
1844           }
1845           imark = i;
1846         }
1847         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1848         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1849       }
1850     } else {
1851       if (idx) *idx = 0;
1852       if (v)   *v   = 0;
1853     }
1854   }
1855   *nz  = nztot;
1856   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1857   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1863 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1864 {
1865   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1866 
1867   PetscFunctionBegin;
1868   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1869   aij->getrowactive = PETSC_FALSE;
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 #undef __FUNCT__
1874 #define __FUNCT__ "MatNorm_MPIAIJ"
1875 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1876 {
1877   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1878   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1879   PetscErrorCode ierr;
1880   PetscInt       i,j,cstart = mat->cmap->rstart;
1881   PetscReal      sum = 0.0;
1882   MatScalar      *v;
1883 
1884   PetscFunctionBegin;
1885   if (aij->size == 1) {
1886     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1887   } else {
1888     if (type == NORM_FROBENIUS) {
1889       v = amat->a;
1890       for (i=0; i<amat->nz; i++) {
1891         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1892       }
1893       v = bmat->a;
1894       for (i=0; i<bmat->nz; i++) {
1895         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1896       }
1897       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1898       *norm = PetscSqrtReal(*norm);
1899       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1900     } else if (type == NORM_1) { /* max column norm */
1901       PetscReal *tmp,*tmp2;
1902       PetscInt  *jj,*garray = aij->garray;
1903       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1904       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1905       *norm = 0.0;
1906       v     = amat->a; jj = amat->j;
1907       for (j=0; j<amat->nz; j++) {
1908         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1909       }
1910       v = bmat->a; jj = bmat->j;
1911       for (j=0; j<bmat->nz; j++) {
1912         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1913       }
1914       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1915       for (j=0; j<mat->cmap->N; j++) {
1916         if (tmp2[j] > *norm) *norm = tmp2[j];
1917       }
1918       ierr = PetscFree(tmp);CHKERRQ(ierr);
1919       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1920       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1921     } else if (type == NORM_INFINITY) { /* max row norm */
1922       PetscReal ntemp = 0.0;
1923       for (j=0; j<aij->A->rmap->n; j++) {
1924         v   = amat->a + amat->i[j];
1925         sum = 0.0;
1926         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1927           sum += PetscAbsScalar(*v); v++;
1928         }
1929         v = bmat->a + bmat->i[j];
1930         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1931           sum += PetscAbsScalar(*v); v++;
1932         }
1933         if (sum > ntemp) ntemp = sum;
1934       }
1935       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1936       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1937     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatTranspose_MPIAIJ"
1944 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1945 {
1946   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1947   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1948   PetscErrorCode ierr;
1949   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1950   PetscInt       cstart = A->cmap->rstart,ncol;
1951   Mat            B;
1952   MatScalar      *array;
1953 
1954   PetscFunctionBegin;
1955   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1956 
1957   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1958   ai = Aloc->i; aj = Aloc->j;
1959   bi = Bloc->i; bj = Bloc->j;
1960   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1961     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1962     PetscSFNode          *oloc;
1963     PETSC_UNUSED PetscSF sf;
1964 
1965     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1966     /* compute d_nnz for preallocation */
1967     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1968     for (i=0; i<ai[ma]; i++) {
1969       d_nnz[aj[i]]++;
1970       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1971     }
1972     /* compute local off-diagonal contributions */
1973     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1974     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1975     /* map those to global */
1976     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1977     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1978     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1979     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1980     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1981     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1982     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1983 
1984     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1985     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1986     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1987     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1988     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1989     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1990   } else {
1991     B    = *matout;
1992     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1993     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1994   }
1995 
1996   /* copy over the A part */
1997   array = Aloc->a;
1998   row   = A->rmap->rstart;
1999   for (i=0; i<ma; i++) {
2000     ncol = ai[i+1]-ai[i];
2001     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2002     row++;
2003     array += ncol; aj += ncol;
2004   }
2005   aj = Aloc->j;
2006   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2007 
2008   /* copy over the B part */
2009   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2010   array = Bloc->a;
2011   row   = A->rmap->rstart;
2012   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2013   cols_tmp = cols;
2014   for (i=0; i<mb; i++) {
2015     ncol = bi[i+1]-bi[i];
2016     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2017     row++;
2018     array += ncol; cols_tmp += ncol;
2019   }
2020   ierr = PetscFree(cols);CHKERRQ(ierr);
2021 
2022   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2023   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2024   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2025     *matout = B;
2026   } else {
2027     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2028   }
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 #undef __FUNCT__
2033 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2034 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2035 {
2036   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2037   Mat            a    = aij->A,b = aij->B;
2038   PetscErrorCode ierr;
2039   PetscInt       s1,s2,s3;
2040 
2041   PetscFunctionBegin;
2042   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2043   if (rr) {
2044     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2045     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2046     /* Overlap communication with computation. */
2047     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2048   }
2049   if (ll) {
2050     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2051     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2052     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2053   }
2054   /* scale  the diagonal block */
2055   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2056 
2057   if (rr) {
2058     /* Do a scatter end and then right scale the off-diagonal block */
2059     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2060     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2061   }
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2067 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2068 {
2069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2070   PetscErrorCode ierr;
2071 
2072   PetscFunctionBegin;
2073   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatEqual_MPIAIJ"
2079 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2080 {
2081   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2082   Mat            a,b,c,d;
2083   PetscBool      flg;
2084   PetscErrorCode ierr;
2085 
2086   PetscFunctionBegin;
2087   a = matA->A; b = matA->B;
2088   c = matB->A; d = matB->B;
2089 
2090   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2091   if (flg) {
2092     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2093   }
2094   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatCopy_MPIAIJ"
2100 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2101 {
2102   PetscErrorCode ierr;
2103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2104   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2105 
2106   PetscFunctionBegin;
2107   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2108   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2109     /* because of the column compression in the off-processor part of the matrix a->B,
2110        the number of columns in a->B and b->B may be different, hence we cannot call
2111        the MatCopy() directly on the two parts. If need be, we can provide a more
2112        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2113        then copying the submatrices */
2114     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2115   } else {
2116     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2117     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2118   }
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatSetUp_MPIAIJ"
2124 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2125 {
2126   PetscErrorCode ierr;
2127 
2128   PetscFunctionBegin;
2129   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 /*
2134    Computes the number of nonzeros per row needed for preallocation when X and Y
2135    have different nonzero structure.
2136 */
2137 #undef __FUNCT__
2138 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2139 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2140 {
2141   PetscInt       i,j,k,nzx,nzy;
2142 
2143   PetscFunctionBegin;
2144   /* Set the number of nonzeros in the new matrix */
2145   for (i=0; i<m; i++) {
2146     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2147     nzx = xi[i+1] - xi[i];
2148     nzy = yi[i+1] - yi[i];
2149     nnz[i] = 0;
2150     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2151       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2152       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2153       nnz[i]++;
2154     }
2155     for (; k<nzy; k++) nnz[i]++;
2156   }
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2163 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2164 {
2165   PetscErrorCode ierr;
2166   PetscInt       m = Y->rmap->N;
2167   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2168   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2169 
2170   PetscFunctionBegin;
2171   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatAXPY_MPIAIJ"
2177 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2178 {
2179   PetscErrorCode ierr;
2180   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2181   PetscBLASInt   bnz,one=1;
2182   Mat_SeqAIJ     *x,*y;
2183 
2184   PetscFunctionBegin;
2185   if (str == SAME_NONZERO_PATTERN) {
2186     PetscScalar alpha = a;
2187     x    = (Mat_SeqAIJ*)xx->A->data;
2188     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2189     y    = (Mat_SeqAIJ*)yy->A->data;
2190     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2191     x    = (Mat_SeqAIJ*)xx->B->data;
2192     y    = (Mat_SeqAIJ*)yy->B->data;
2193     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2194     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2195     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2196   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2197     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2198   } else {
2199     Mat      B;
2200     PetscInt *nnz_d,*nnz_o;
2201     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2202     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2203     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2204     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2205     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2206     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2207     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2208     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2209     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2210     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2211     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2212     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2213     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2214     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2215   }
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatConjugate_MPIAIJ"
2223 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2224 {
2225 #if defined(PETSC_USE_COMPLEX)
2226   PetscErrorCode ierr;
2227   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2228 
2229   PetscFunctionBegin;
2230   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2231   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2232 #else
2233   PetscFunctionBegin;
2234 #endif
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatRealPart_MPIAIJ"
2240 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2241 {
2242   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2243   PetscErrorCode ierr;
2244 
2245   PetscFunctionBegin;
2246   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2247   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 #undef __FUNCT__
2252 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2253 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2254 {
2255   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2256   PetscErrorCode ierr;
2257 
2258   PetscFunctionBegin;
2259   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2260   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2266 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2267 {
2268   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2269   PetscErrorCode ierr;
2270   PetscInt       i,*idxb = 0;
2271   PetscScalar    *va,*vb;
2272   Vec            vtmp;
2273 
2274   PetscFunctionBegin;
2275   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2276   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2277   if (idx) {
2278     for (i=0; i<A->rmap->n; i++) {
2279       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2280     }
2281   }
2282 
2283   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2284   if (idx) {
2285     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2286   }
2287   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2288   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2289 
2290   for (i=0; i<A->rmap->n; i++) {
2291     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2292       va[i] = vb[i];
2293       if (idx) idx[i] = a->garray[idxb[i]];
2294     }
2295   }
2296 
2297   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2298   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2299   ierr = PetscFree(idxb);CHKERRQ(ierr);
2300   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 #undef __FUNCT__
2305 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2306 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2307 {
2308   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2309   PetscErrorCode ierr;
2310   PetscInt       i,*idxb = 0;
2311   PetscScalar    *va,*vb;
2312   Vec            vtmp;
2313 
2314   PetscFunctionBegin;
2315   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2316   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2317   if (idx) {
2318     for (i=0; i<A->cmap->n; i++) {
2319       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2320     }
2321   }
2322 
2323   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2324   if (idx) {
2325     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2326   }
2327   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2328   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2329 
2330   for (i=0; i<A->rmap->n; i++) {
2331     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2332       va[i] = vb[i];
2333       if (idx) idx[i] = a->garray[idxb[i]];
2334     }
2335   }
2336 
2337   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2338   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2339   ierr = PetscFree(idxb);CHKERRQ(ierr);
2340   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2346 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2347 {
2348   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2349   PetscInt       n      = A->rmap->n;
2350   PetscInt       cstart = A->cmap->rstart;
2351   PetscInt       *cmap  = mat->garray;
2352   PetscInt       *diagIdx, *offdiagIdx;
2353   Vec            diagV, offdiagV;
2354   PetscScalar    *a, *diagA, *offdiagA;
2355   PetscInt       r;
2356   PetscErrorCode ierr;
2357 
2358   PetscFunctionBegin;
2359   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2360   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2361   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2362   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2363   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2364   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2365   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2366   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2367   for (r = 0; r < n; ++r) {
2368     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2369       a[r]   = diagA[r];
2370       idx[r] = cstart + diagIdx[r];
2371     } else {
2372       a[r]   = offdiagA[r];
2373       idx[r] = cmap[offdiagIdx[r]];
2374     }
2375   }
2376   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2377   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2378   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2379   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2380   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2381   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2387 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2388 {
2389   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2390   PetscInt       n      = A->rmap->n;
2391   PetscInt       cstart = A->cmap->rstart;
2392   PetscInt       *cmap  = mat->garray;
2393   PetscInt       *diagIdx, *offdiagIdx;
2394   Vec            diagV, offdiagV;
2395   PetscScalar    *a, *diagA, *offdiagA;
2396   PetscInt       r;
2397   PetscErrorCode ierr;
2398 
2399   PetscFunctionBegin;
2400   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2401   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2402   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2403   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2404   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2405   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2406   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2407   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2408   for (r = 0; r < n; ++r) {
2409     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2410       a[r]   = diagA[r];
2411       idx[r] = cstart + diagIdx[r];
2412     } else {
2413       a[r]   = offdiagA[r];
2414       idx[r] = cmap[offdiagIdx[r]];
2415     }
2416   }
2417   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2418   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2419   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2420   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2421   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2422   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2428 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2429 {
2430   PetscErrorCode ierr;
2431   Mat            *dummy;
2432 
2433   PetscFunctionBegin;
2434   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2435   *newmat = *dummy;
2436   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2442 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2443 {
2444   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2445   PetscErrorCode ierr;
2446 
2447   PetscFunctionBegin;
2448   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2449   A->errortype = a->A->errortype;
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2455 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2456 {
2457   PetscErrorCode ierr;
2458   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2459 
2460   PetscFunctionBegin;
2461   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2462   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2463   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2464   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 #undef __FUNCT__
2469 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2470 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2471 {
2472   PetscFunctionBegin;
2473   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2474   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2480 /*@
2481    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2482 
2483    Collective on Mat
2484 
2485    Input Parameters:
2486 +    A - the matrix
2487 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2488 
2489  Level: advanced
2490 
2491 @*/
2492 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2493 {
2494   PetscErrorCode       ierr;
2495 
2496   PetscFunctionBegin;
2497   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 #undef __FUNCT__
2502 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2503 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2504 {
2505   PetscErrorCode       ierr;
2506   PetscBool            sc = PETSC_FALSE,flg;
2507 
2508   PetscFunctionBegin;
2509   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2510   ierr = PetscObjectOptionsBegin((PetscObject)A);
2511     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2512     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2513     if (flg) {
2514       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2515     }
2516   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 #undef __FUNCT__
2521 #define __FUNCT__ "MatShift_MPIAIJ"
2522 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2523 {
2524   PetscErrorCode ierr;
2525   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2526   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2527 
2528   PetscFunctionBegin;
2529   if (!Y->preallocated) {
2530     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2531   } else if (!aij->nz) {
2532     PetscInt nonew = aij->nonew;
2533     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2534     aij->nonew = nonew;
2535   }
2536   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2542 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2543 {
2544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2545   PetscErrorCode ierr;
2546 
2547   PetscFunctionBegin;
2548   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2549   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2550   if (d) {
2551     PetscInt rstart;
2552     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2553     *d += rstart;
2554 
2555   }
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 
2560 /* -------------------------------------------------------------------*/
2561 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2562                                        MatGetRow_MPIAIJ,
2563                                        MatRestoreRow_MPIAIJ,
2564                                        MatMult_MPIAIJ,
2565                                 /* 4*/ MatMultAdd_MPIAIJ,
2566                                        MatMultTranspose_MPIAIJ,
2567                                        MatMultTransposeAdd_MPIAIJ,
2568                                        0,
2569                                        0,
2570                                        0,
2571                                 /*10*/ 0,
2572                                        0,
2573                                        0,
2574                                        MatSOR_MPIAIJ,
2575                                        MatTranspose_MPIAIJ,
2576                                 /*15*/ MatGetInfo_MPIAIJ,
2577                                        MatEqual_MPIAIJ,
2578                                        MatGetDiagonal_MPIAIJ,
2579                                        MatDiagonalScale_MPIAIJ,
2580                                        MatNorm_MPIAIJ,
2581                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2582                                        MatAssemblyEnd_MPIAIJ,
2583                                        MatSetOption_MPIAIJ,
2584                                        MatZeroEntries_MPIAIJ,
2585                                 /*24*/ MatZeroRows_MPIAIJ,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                        0,
2590                                 /*29*/ MatSetUp_MPIAIJ,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                 /*34*/ MatDuplicate_MPIAIJ,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                        0,
2600                                 /*39*/ MatAXPY_MPIAIJ,
2601                                        MatGetSubMatrices_MPIAIJ,
2602                                        MatIncreaseOverlap_MPIAIJ,
2603                                        MatGetValues_MPIAIJ,
2604                                        MatCopy_MPIAIJ,
2605                                 /*44*/ MatGetRowMax_MPIAIJ,
2606                                        MatScale_MPIAIJ,
2607                                        MatShift_MPIAIJ,
2608                                        MatDiagonalSet_MPIAIJ,
2609                                        MatZeroRowsColumns_MPIAIJ,
2610                                 /*49*/ MatSetRandom_MPIAIJ,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2616                                        0,
2617                                        MatSetUnfactored_MPIAIJ,
2618                                        MatPermute_MPIAIJ,
2619                                        0,
2620                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2621                                        MatDestroy_MPIAIJ,
2622                                        MatView_MPIAIJ,
2623                                        0,
2624                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2625                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2626                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2631                                        MatGetRowMinAbs_MPIAIJ,
2632                                        0,
2633                                        MatSetColoring_MPIAIJ,
2634                                        0,
2635                                        MatSetValuesAdifor_MPIAIJ,
2636                                 /*75*/ MatFDColoringApply_AIJ,
2637                                        MatSetFromOptions_MPIAIJ,
2638                                        0,
2639                                        0,
2640                                        MatFindZeroDiagonals_MPIAIJ,
2641                                 /*80*/ 0,
2642                                        0,
2643                                        0,
2644                                 /*83*/ MatLoad_MPIAIJ,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                        0,
2650                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2651                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2652                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2653                                        MatPtAP_MPIAIJ_MPIAIJ,
2654                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2655                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*99*/ 0,
2661                                        0,
2662                                        0,
2663                                        MatConjugate_MPIAIJ,
2664                                        0,
2665                                 /*104*/MatSetValuesRow_MPIAIJ,
2666                                        MatRealPart_MPIAIJ,
2667                                        MatImaginaryPart_MPIAIJ,
2668                                        0,
2669                                        0,
2670                                 /*109*/0,
2671                                        0,
2672                                        MatGetRowMin_MPIAIJ,
2673                                        0,
2674                                        MatMissingDiagonal_MPIAIJ,
2675                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2676                                        0,
2677                                        MatGetGhosts_MPIAIJ,
2678                                        0,
2679                                        0,
2680                                 /*119*/0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        MatGetMultiProcBlock_MPIAIJ,
2685                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2686                                        MatGetColumnNorms_MPIAIJ,
2687                                        MatInvertBlockDiagonal_MPIAIJ,
2688                                        0,
2689                                        MatGetSubMatricesMPI_MPIAIJ,
2690                                 /*129*/0,
2691                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2692                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2693                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2694                                        0,
2695                                 /*134*/0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                 /*139*/0,
2701                                        0,
2702                                        0,
2703                                        MatFDColoringSetUp_MPIXAIJ,
2704                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2705                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2706 };
2707 
2708 /* ----------------------------------------------------------------------------------------*/
2709 
2710 #undef __FUNCT__
2711 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2712 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2713 {
2714   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2715   PetscErrorCode ierr;
2716 
2717   PetscFunctionBegin;
2718   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2719   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 #undef __FUNCT__
2724 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2725 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2726 {
2727   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2728   PetscErrorCode ierr;
2729 
2730   PetscFunctionBegin;
2731   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2732   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2738 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2739 {
2740   Mat_MPIAIJ     *b;
2741   PetscErrorCode ierr;
2742 
2743   PetscFunctionBegin;
2744   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2745   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2746   b = (Mat_MPIAIJ*)B->data;
2747 
2748   if (!B->preallocated) {
2749     /* Explicitly create 2 MATSEQAIJ matrices. */
2750     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2751     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2752     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2753     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2754     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2755     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2756     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2757     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2758     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2759     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2760   }
2761 
2762   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2763   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2764   B->preallocated = PETSC_TRUE;
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 #undef __FUNCT__
2769 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2770 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2771 {
2772   Mat            mat;
2773   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2774   PetscErrorCode ierr;
2775 
2776   PetscFunctionBegin;
2777   *newmat = 0;
2778   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2779   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2780   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2781   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2782   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2783   a       = (Mat_MPIAIJ*)mat->data;
2784 
2785   mat->factortype   = matin->factortype;
2786   mat->assembled    = PETSC_TRUE;
2787   mat->insertmode   = NOT_SET_VALUES;
2788   mat->preallocated = PETSC_TRUE;
2789 
2790   a->size         = oldmat->size;
2791   a->rank         = oldmat->rank;
2792   a->donotstash   = oldmat->donotstash;
2793   a->roworiented  = oldmat->roworiented;
2794   a->rowindices   = 0;
2795   a->rowvalues    = 0;
2796   a->getrowactive = PETSC_FALSE;
2797 
2798   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2799   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2800 
2801   if (oldmat->colmap) {
2802 #if defined(PETSC_USE_CTABLE)
2803     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2804 #else
2805     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2806     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2807     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2808 #endif
2809   } else a->colmap = 0;
2810   if (oldmat->garray) {
2811     PetscInt len;
2812     len  = oldmat->B->cmap->n;
2813     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2814     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2815     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2816   } else a->garray = 0;
2817 
2818   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2819   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2820   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2821   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2822   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2823   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2824   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2825   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2826   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2827   *newmat = mat;
2828   PetscFunctionReturn(0);
2829 }
2830 
2831 
2832 
2833 #undef __FUNCT__
2834 #define __FUNCT__ "MatLoad_MPIAIJ"
2835 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2836 {
2837   PetscScalar    *vals,*svals;
2838   MPI_Comm       comm;
2839   PetscErrorCode ierr;
2840   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2841   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2842   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2843   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2844   PetscInt       cend,cstart,n,*rowners;
2845   int            fd;
2846   PetscInt       bs = newMat->rmap->bs;
2847 
2848   PetscFunctionBegin;
2849   /* force binary viewer to load .info file if it has not yet done so */
2850   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2851   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2852   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2853   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2854   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2855   if (!rank) {
2856     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2857     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2858   }
2859 
2860   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2861   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2862   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2863   if (bs < 0) bs = 1;
2864 
2865   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2866   M    = header[1]; N = header[2];
2867 
2868   /* If global sizes are set, check if they are consistent with that given in the file */
2869   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2870   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2871 
2872   /* determine ownership of all (block) rows */
2873   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2874   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2875   else m = newMat->rmap->n; /* Set by user */
2876 
2877   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2878   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2879 
2880   /* First process needs enough room for process with most rows */
2881   if (!rank) {
2882     mmax = rowners[1];
2883     for (i=2; i<=size; i++) {
2884       mmax = PetscMax(mmax, rowners[i]);
2885     }
2886   } else mmax = -1;             /* unused, but compilers complain */
2887 
2888   rowners[0] = 0;
2889   for (i=2; i<=size; i++) {
2890     rowners[i] += rowners[i-1];
2891   }
2892   rstart = rowners[rank];
2893   rend   = rowners[rank+1];
2894 
2895   /* distribute row lengths to all processors */
2896   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2897   if (!rank) {
2898     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2899     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2900     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2901     for (j=0; j<m; j++) {
2902       procsnz[0] += ourlens[j];
2903     }
2904     for (i=1; i<size; i++) {
2905       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2906       /* calculate the number of nonzeros on each processor */
2907       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2908         procsnz[i] += rowlengths[j];
2909       }
2910       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2911     }
2912     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2913   } else {
2914     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2915   }
2916 
2917   if (!rank) {
2918     /* determine max buffer needed and allocate it */
2919     maxnz = 0;
2920     for (i=0; i<size; i++) {
2921       maxnz = PetscMax(maxnz,procsnz[i]);
2922     }
2923     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2924 
2925     /* read in my part of the matrix column indices  */
2926     nz   = procsnz[0];
2927     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2928     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2929 
2930     /* read in every one elses and ship off */
2931     for (i=1; i<size; i++) {
2932       nz   = procsnz[i];
2933       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2934       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2935     }
2936     ierr = PetscFree(cols);CHKERRQ(ierr);
2937   } else {
2938     /* determine buffer space needed for message */
2939     nz = 0;
2940     for (i=0; i<m; i++) {
2941       nz += ourlens[i];
2942     }
2943     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2944 
2945     /* receive message of column indices*/
2946     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2947   }
2948 
2949   /* determine column ownership if matrix is not square */
2950   if (N != M) {
2951     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2952     else n = newMat->cmap->n;
2953     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2954     cstart = cend - n;
2955   } else {
2956     cstart = rstart;
2957     cend   = rend;
2958     n      = cend - cstart;
2959   }
2960 
2961   /* loop over local rows, determining number of off diagonal entries */
2962   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2963   jj   = 0;
2964   for (i=0; i<m; i++) {
2965     for (j=0; j<ourlens[i]; j++) {
2966       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2967       jj++;
2968     }
2969   }
2970 
2971   for (i=0; i<m; i++) {
2972     ourlens[i] -= offlens[i];
2973   }
2974   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2975 
2976   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2977 
2978   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2979 
2980   for (i=0; i<m; i++) {
2981     ourlens[i] += offlens[i];
2982   }
2983 
2984   if (!rank) {
2985     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2986 
2987     /* read in my part of the matrix numerical values  */
2988     nz   = procsnz[0];
2989     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2990 
2991     /* insert into matrix */
2992     jj      = rstart;
2993     smycols = mycols;
2994     svals   = vals;
2995     for (i=0; i<m; i++) {
2996       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2997       smycols += ourlens[i];
2998       svals   += ourlens[i];
2999       jj++;
3000     }
3001 
3002     /* read in other processors and ship out */
3003     for (i=1; i<size; i++) {
3004       nz   = procsnz[i];
3005       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3006       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3007     }
3008     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3009   } else {
3010     /* receive numeric values */
3011     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3012 
3013     /* receive message of values*/
3014     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3015 
3016     /* insert into matrix */
3017     jj      = rstart;
3018     smycols = mycols;
3019     svals   = vals;
3020     for (i=0; i<m; i++) {
3021       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3022       smycols += ourlens[i];
3023       svals   += ourlens[i];
3024       jj++;
3025     }
3026   }
3027   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3028   ierr = PetscFree(vals);CHKERRQ(ierr);
3029   ierr = PetscFree(mycols);CHKERRQ(ierr);
3030   ierr = PetscFree(rowners);CHKERRQ(ierr);
3031   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3032   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3033   PetscFunctionReturn(0);
3034 }
3035 
3036 #undef __FUNCT__
3037 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3038 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3039 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3040 {
3041   PetscErrorCode ierr;
3042   IS             iscol_local;
3043   PetscInt       csize;
3044 
3045   PetscFunctionBegin;
3046   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3047   if (call == MAT_REUSE_MATRIX) {
3048     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3049     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3050   } else {
3051     /* check if we are grabbing all columns*/
3052     PetscBool    isstride;
3053     PetscMPIInt  lisstride = 0,gisstride;
3054     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3055     if (isstride) {
3056       PetscInt  start,len,mstart,mlen;
3057       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3058       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3059       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3060       if (mstart == start && mlen-mstart == len) lisstride = 1;
3061     }
3062     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3063     if (gisstride) {
3064       PetscInt N;
3065       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3066       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3067       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3068       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3069     } else {
3070       PetscInt cbs;
3071       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3072       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3073       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3074     }
3075   }
3076   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3077   if (call == MAT_INITIAL_MATRIX) {
3078     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3079     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3080   }
3081   PetscFunctionReturn(0);
3082 }
3083 
3084 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3085 #undef __FUNCT__
3086 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3087 /*
3088     Not great since it makes two copies of the submatrix, first an SeqAIJ
3089   in local and then by concatenating the local matrices the end result.
3090   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3091 
3092   Note: This requires a sequential iscol with all indices.
3093 */
3094 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3095 {
3096   PetscErrorCode ierr;
3097   PetscMPIInt    rank,size;
3098   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3099   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3100   PetscBool      allcolumns, colflag;
3101   Mat            M,Mreuse;
3102   MatScalar      *vwork,*aa;
3103   MPI_Comm       comm;
3104   Mat_SeqAIJ     *aij;
3105 
3106   PetscFunctionBegin;
3107   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3108   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3109   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3110 
3111   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3112   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3113   if (colflag && ncol == mat->cmap->N) {
3114     allcolumns = PETSC_TRUE;
3115     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3116   } else {
3117     allcolumns = PETSC_FALSE;
3118   }
3119   if (call ==  MAT_REUSE_MATRIX) {
3120     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3121     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3122     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3123   } else {
3124     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3125   }
3126 
3127   /*
3128       m - number of local rows
3129       n - number of columns (same on all processors)
3130       rstart - first row in new global matrix generated
3131   */
3132   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3133   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3134   if (call == MAT_INITIAL_MATRIX) {
3135     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3136     ii  = aij->i;
3137     jj  = aij->j;
3138 
3139     /*
3140         Determine the number of non-zeros in the diagonal and off-diagonal
3141         portions of the matrix in order to do correct preallocation
3142     */
3143 
3144     /* first get start and end of "diagonal" columns */
3145     if (csize == PETSC_DECIDE) {
3146       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3147       if (mglobal == n) { /* square matrix */
3148         nlocal = m;
3149       } else {
3150         nlocal = n/size + ((n % size) > rank);
3151       }
3152     } else {
3153       nlocal = csize;
3154     }
3155     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3156     rstart = rend - nlocal;
3157     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3158 
3159     /* next, compute all the lengths */
3160     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3161     olens = dlens + m;
3162     for (i=0; i<m; i++) {
3163       jend = ii[i+1] - ii[i];
3164       olen = 0;
3165       dlen = 0;
3166       for (j=0; j<jend; j++) {
3167         if (*jj < rstart || *jj >= rend) olen++;
3168         else dlen++;
3169         jj++;
3170       }
3171       olens[i] = olen;
3172       dlens[i] = dlen;
3173     }
3174     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3175     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3176     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3177     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3178     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3179     ierr = PetscFree(dlens);CHKERRQ(ierr);
3180   } else {
3181     PetscInt ml,nl;
3182 
3183     M    = *newmat;
3184     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3185     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3186     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3187     /*
3188          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3189        rather than the slower MatSetValues().
3190     */
3191     M->was_assembled = PETSC_TRUE;
3192     M->assembled     = PETSC_FALSE;
3193   }
3194   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3195   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3196   ii   = aij->i;
3197   jj   = aij->j;
3198   aa   = aij->a;
3199   for (i=0; i<m; i++) {
3200     row   = rstart + i;
3201     nz    = ii[i+1] - ii[i];
3202     cwork = jj;     jj += nz;
3203     vwork = aa;     aa += nz;
3204     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3205   }
3206 
3207   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3208   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3209   *newmat = M;
3210 
3211   /* save submatrix used in processor for next request */
3212   if (call ==  MAT_INITIAL_MATRIX) {
3213     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3214     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3215   }
3216   PetscFunctionReturn(0);
3217 }
3218 
3219 #undef __FUNCT__
3220 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3221 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3222 {
3223   PetscInt       m,cstart, cend,j,nnz,i,d;
3224   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3225   const PetscInt *JJ;
3226   PetscScalar    *values;
3227   PetscErrorCode ierr;
3228 
3229   PetscFunctionBegin;
3230   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3231 
3232   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3233   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3234   m      = B->rmap->n;
3235   cstart = B->cmap->rstart;
3236   cend   = B->cmap->rend;
3237   rstart = B->rmap->rstart;
3238 
3239   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3240 
3241 #if defined(PETSC_USE_DEBUGGING)
3242   for (i=0; i<m; i++) {
3243     nnz = Ii[i+1]- Ii[i];
3244     JJ  = J + Ii[i];
3245     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3246     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3247     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3248   }
3249 #endif
3250 
3251   for (i=0; i<m; i++) {
3252     nnz     = Ii[i+1]- Ii[i];
3253     JJ      = J + Ii[i];
3254     nnz_max = PetscMax(nnz_max,nnz);
3255     d       = 0;
3256     for (j=0; j<nnz; j++) {
3257       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3258     }
3259     d_nnz[i] = d;
3260     o_nnz[i] = nnz - d;
3261   }
3262   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3263   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3264 
3265   if (v) values = (PetscScalar*)v;
3266   else {
3267     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3268   }
3269 
3270   for (i=0; i<m; i++) {
3271     ii   = i + rstart;
3272     nnz  = Ii[i+1]- Ii[i];
3273     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3274   }
3275   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3276   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3277 
3278   if (!v) {
3279     ierr = PetscFree(values);CHKERRQ(ierr);
3280   }
3281   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3282   PetscFunctionReturn(0);
3283 }
3284 
3285 #undef __FUNCT__
3286 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3287 /*@
3288    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3289    (the default parallel PETSc format).
3290 
3291    Collective on MPI_Comm
3292 
3293    Input Parameters:
3294 +  B - the matrix
3295 .  i - the indices into j for the start of each local row (starts with zero)
3296 .  j - the column indices for each local row (starts with zero)
3297 -  v - optional values in the matrix
3298 
3299    Level: developer
3300 
3301    Notes:
3302        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3303      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3304      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3305 
3306        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3307 
3308        The format which is used for the sparse matrix input, is equivalent to a
3309     row-major ordering.. i.e for the following matrix, the input data expected is
3310     as shown
3311 
3312 $        1 0 0
3313 $        2 0 3     P0
3314 $       -------
3315 $        4 5 6     P1
3316 $
3317 $     Process0 [P0]: rows_owned=[0,1]
3318 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3319 $        j =  {0,0,2}  [size = 3]
3320 $        v =  {1,2,3}  [size = 3]
3321 $
3322 $     Process1 [P1]: rows_owned=[2]
3323 $        i =  {0,3}    [size = nrow+1  = 1+1]
3324 $        j =  {0,1,2}  [size = 3]
3325 $        v =  {4,5,6}  [size = 3]
3326 
3327 .keywords: matrix, aij, compressed row, sparse, parallel
3328 
3329 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3330           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3331 @*/
3332 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3333 {
3334   PetscErrorCode ierr;
3335 
3336   PetscFunctionBegin;
3337   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3338   PetscFunctionReturn(0);
3339 }
3340 
3341 #undef __FUNCT__
3342 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3343 /*@C
3344    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3345    (the default parallel PETSc format).  For good matrix assembly performance
3346    the user should preallocate the matrix storage by setting the parameters
3347    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3348    performance can be increased by more than a factor of 50.
3349 
3350    Collective on MPI_Comm
3351 
3352    Input Parameters:
3353 +  B - the matrix
3354 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3355            (same value is used for all local rows)
3356 .  d_nnz - array containing the number of nonzeros in the various rows of the
3357            DIAGONAL portion of the local submatrix (possibly different for each row)
3358            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3359            The size of this array is equal to the number of local rows, i.e 'm'.
3360            For matrices that will be factored, you must leave room for (and set)
3361            the diagonal entry even if it is zero.
3362 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3363            submatrix (same value is used for all local rows).
3364 -  o_nnz - array containing the number of nonzeros in the various rows of the
3365            OFF-DIAGONAL portion of the local submatrix (possibly different for
3366            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3367            structure. The size of this array is equal to the number
3368            of local rows, i.e 'm'.
3369 
3370    If the *_nnz parameter is given then the *_nz parameter is ignored
3371 
3372    The AIJ format (also called the Yale sparse matrix format or
3373    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3374    storage.  The stored row and column indices begin with zero.
3375    See Users-Manual: ch_mat for details.
3376 
3377    The parallel matrix is partitioned such that the first m0 rows belong to
3378    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3379    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3380 
3381    The DIAGONAL portion of the local submatrix of a processor can be defined
3382    as the submatrix which is obtained by extraction the part corresponding to
3383    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3384    first row that belongs to the processor, r2 is the last row belonging to
3385    the this processor, and c1-c2 is range of indices of the local part of a
3386    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3387    common case of a square matrix, the row and column ranges are the same and
3388    the DIAGONAL part is also square. The remaining portion of the local
3389    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3390 
3391    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3392 
3393    You can call MatGetInfo() to get information on how effective the preallocation was;
3394    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3395    You can also run with the option -info and look for messages with the string
3396    malloc in them to see if additional memory allocation was needed.
3397 
3398    Example usage:
3399 
3400    Consider the following 8x8 matrix with 34 non-zero values, that is
3401    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3402    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3403    as follows:
3404 
3405 .vb
3406             1  2  0  |  0  3  0  |  0  4
3407     Proc0   0  5  6  |  7  0  0  |  8  0
3408             9  0 10  | 11  0  0  | 12  0
3409     -------------------------------------
3410            13  0 14  | 15 16 17  |  0  0
3411     Proc1   0 18  0  | 19 20 21  |  0  0
3412             0  0  0  | 22 23  0  | 24  0
3413     -------------------------------------
3414     Proc2  25 26 27  |  0  0 28  | 29  0
3415            30  0  0  | 31 32 33  |  0 34
3416 .ve
3417 
3418    This can be represented as a collection of submatrices as:
3419 
3420 .vb
3421       A B C
3422       D E F
3423       G H I
3424 .ve
3425 
3426    Where the submatrices A,B,C are owned by proc0, D,E,F are
3427    owned by proc1, G,H,I are owned by proc2.
3428 
3429    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3430    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3431    The 'M','N' parameters are 8,8, and have the same values on all procs.
3432 
3433    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3434    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3435    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3436    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3437    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3438    matrix, ans [DF] as another SeqAIJ matrix.
3439 
3440    When d_nz, o_nz parameters are specified, d_nz storage elements are
3441    allocated for every row of the local diagonal submatrix, and o_nz
3442    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3443    One way to choose d_nz and o_nz is to use the max nonzerors per local
3444    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3445    In this case, the values of d_nz,o_nz are:
3446 .vb
3447      proc0 : dnz = 2, o_nz = 2
3448      proc1 : dnz = 3, o_nz = 2
3449      proc2 : dnz = 1, o_nz = 4
3450 .ve
3451    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3452    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3453    for proc3. i.e we are using 12+15+10=37 storage locations to store
3454    34 values.
3455 
3456    When d_nnz, o_nnz parameters are specified, the storage is specified
3457    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3458    In the above case the values for d_nnz,o_nnz are:
3459 .vb
3460      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3461      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3462      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3463 .ve
3464    Here the space allocated is sum of all the above values i.e 34, and
3465    hence pre-allocation is perfect.
3466 
3467    Level: intermediate
3468 
3469 .keywords: matrix, aij, compressed row, sparse, parallel
3470 
3471 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3472           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3473 @*/
3474 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3475 {
3476   PetscErrorCode ierr;
3477 
3478   PetscFunctionBegin;
3479   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3480   PetscValidType(B,1);
3481   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3482   PetscFunctionReturn(0);
3483 }
3484 
3485 #undef __FUNCT__
3486 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3487 /*@
3488      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3489          CSR format the local rows.
3490 
3491    Collective on MPI_Comm
3492 
3493    Input Parameters:
3494 +  comm - MPI communicator
3495 .  m - number of local rows (Cannot be PETSC_DECIDE)
3496 .  n - This value should be the same as the local size used in creating the
3497        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3498        calculated if N is given) For square matrices n is almost always m.
3499 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3500 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3501 .   i - row indices
3502 .   j - column indices
3503 -   a - matrix values
3504 
3505    Output Parameter:
3506 .   mat - the matrix
3507 
3508    Level: intermediate
3509 
3510    Notes:
3511        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3512      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3513      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3514 
3515        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3516 
3517        The format which is used for the sparse matrix input, is equivalent to a
3518     row-major ordering.. i.e for the following matrix, the input data expected is
3519     as shown
3520 
3521 $        1 0 0
3522 $        2 0 3     P0
3523 $       -------
3524 $        4 5 6     P1
3525 $
3526 $     Process0 [P0]: rows_owned=[0,1]
3527 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3528 $        j =  {0,0,2}  [size = 3]
3529 $        v =  {1,2,3}  [size = 3]
3530 $
3531 $     Process1 [P1]: rows_owned=[2]
3532 $        i =  {0,3}    [size = nrow+1  = 1+1]
3533 $        j =  {0,1,2}  [size = 3]
3534 $        v =  {4,5,6}  [size = 3]
3535 
3536 .keywords: matrix, aij, compressed row, sparse, parallel
3537 
3538 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3539           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3540 @*/
3541 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3542 {
3543   PetscErrorCode ierr;
3544 
3545   PetscFunctionBegin;
3546   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3547   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3548   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3549   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3550   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3551   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3552   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3553   PetscFunctionReturn(0);
3554 }
3555 
3556 #undef __FUNCT__
3557 #define __FUNCT__ "MatCreateAIJ"
3558 /*@C
3559    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3560    (the default parallel PETSc format).  For good matrix assembly performance
3561    the user should preallocate the matrix storage by setting the parameters
3562    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3563    performance can be increased by more than a factor of 50.
3564 
3565    Collective on MPI_Comm
3566 
3567    Input Parameters:
3568 +  comm - MPI communicator
3569 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3570            This value should be the same as the local size used in creating the
3571            y vector for the matrix-vector product y = Ax.
3572 .  n - This value should be the same as the local size used in creating the
3573        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3574        calculated if N is given) For square matrices n is almost always m.
3575 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3576 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3577 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3578            (same value is used for all local rows)
3579 .  d_nnz - array containing the number of nonzeros in the various rows of the
3580            DIAGONAL portion of the local submatrix (possibly different for each row)
3581            or NULL, if d_nz is used to specify the nonzero structure.
3582            The size of this array is equal to the number of local rows, i.e 'm'.
3583 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3584            submatrix (same value is used for all local rows).
3585 -  o_nnz - array containing the number of nonzeros in the various rows of the
3586            OFF-DIAGONAL portion of the local submatrix (possibly different for
3587            each row) or NULL, if o_nz is used to specify the nonzero
3588            structure. The size of this array is equal to the number
3589            of local rows, i.e 'm'.
3590 
3591    Output Parameter:
3592 .  A - the matrix
3593 
3594    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3595    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3596    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3597 
3598    Notes:
3599    If the *_nnz parameter is given then the *_nz parameter is ignored
3600 
3601    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3602    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3603    storage requirements for this matrix.
3604 
3605    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3606    processor than it must be used on all processors that share the object for
3607    that argument.
3608 
3609    The user MUST specify either the local or global matrix dimensions
3610    (possibly both).
3611 
3612    The parallel matrix is partitioned across processors such that the
3613    first m0 rows belong to process 0, the next m1 rows belong to
3614    process 1, the next m2 rows belong to process 2 etc.. where
3615    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3616    values corresponding to [m x N] submatrix.
3617 
3618    The columns are logically partitioned with the n0 columns belonging
3619    to 0th partition, the next n1 columns belonging to the next
3620    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3621 
3622    The DIAGONAL portion of the local submatrix on any given processor
3623    is the submatrix corresponding to the rows and columns m,n
3624    corresponding to the given processor. i.e diagonal matrix on
3625    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3626    etc. The remaining portion of the local submatrix [m x (N-n)]
3627    constitute the OFF-DIAGONAL portion. The example below better
3628    illustrates this concept.
3629 
3630    For a square global matrix we define each processor's diagonal portion
3631    to be its local rows and the corresponding columns (a square submatrix);
3632    each processor's off-diagonal portion encompasses the remainder of the
3633    local matrix (a rectangular submatrix).
3634 
3635    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3636 
3637    When calling this routine with a single process communicator, a matrix of
3638    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3639    type of communicator, use the construction mechanism:
3640      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3641 
3642    By default, this format uses inodes (identical nodes) when possible.
3643    We search for consecutive rows with the same nonzero structure, thereby
3644    reusing matrix information to achieve increased efficiency.
3645 
3646    Options Database Keys:
3647 +  -mat_no_inode  - Do not use inodes
3648 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3649 -  -mat_aij_oneindex - Internally use indexing starting at 1
3650         rather than 0.  Note that when calling MatSetValues(),
3651         the user still MUST index entries starting at 0!
3652 
3653 
3654    Example usage:
3655 
3656    Consider the following 8x8 matrix with 34 non-zero values, that is
3657    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3658    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3659    as follows:
3660 
3661 .vb
3662             1  2  0  |  0  3  0  |  0  4
3663     Proc0   0  5  6  |  7  0  0  |  8  0
3664             9  0 10  | 11  0  0  | 12  0
3665     -------------------------------------
3666            13  0 14  | 15 16 17  |  0  0
3667     Proc1   0 18  0  | 19 20 21  |  0  0
3668             0  0  0  | 22 23  0  | 24  0
3669     -------------------------------------
3670     Proc2  25 26 27  |  0  0 28  | 29  0
3671            30  0  0  | 31 32 33  |  0 34
3672 .ve
3673 
3674    This can be represented as a collection of submatrices as:
3675 
3676 .vb
3677       A B C
3678       D E F
3679       G H I
3680 .ve
3681 
3682    Where the submatrices A,B,C are owned by proc0, D,E,F are
3683    owned by proc1, G,H,I are owned by proc2.
3684 
3685    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3686    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3687    The 'M','N' parameters are 8,8, and have the same values on all procs.
3688 
3689    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3690    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3691    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3692    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3693    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3694    matrix, ans [DF] as another SeqAIJ matrix.
3695 
3696    When d_nz, o_nz parameters are specified, d_nz storage elements are
3697    allocated for every row of the local diagonal submatrix, and o_nz
3698    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3699    One way to choose d_nz and o_nz is to use the max nonzerors per local
3700    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3701    In this case, the values of d_nz,o_nz are:
3702 .vb
3703      proc0 : dnz = 2, o_nz = 2
3704      proc1 : dnz = 3, o_nz = 2
3705      proc2 : dnz = 1, o_nz = 4
3706 .ve
3707    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3708    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3709    for proc3. i.e we are using 12+15+10=37 storage locations to store
3710    34 values.
3711 
3712    When d_nnz, o_nnz parameters are specified, the storage is specified
3713    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3714    In the above case the values for d_nnz,o_nnz are:
3715 .vb
3716      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3717      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3718      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3719 .ve
3720    Here the space allocated is sum of all the above values i.e 34, and
3721    hence pre-allocation is perfect.
3722 
3723    Level: intermediate
3724 
3725 .keywords: matrix, aij, compressed row, sparse, parallel
3726 
3727 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3728           MPIAIJ, MatCreateMPIAIJWithArrays()
3729 @*/
3730 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3731 {
3732   PetscErrorCode ierr;
3733   PetscMPIInt    size;
3734 
3735   PetscFunctionBegin;
3736   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3737   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3738   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3739   if (size > 1) {
3740     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3741     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3742   } else {
3743     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3744     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3745   }
3746   PetscFunctionReturn(0);
3747 }
3748 
3749 #undef __FUNCT__
3750 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3751 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3752 {
3753   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3754   PetscBool      flg;
3755   PetscErrorCode ierr;
3756 
3757   PetscFunctionBegin;
3758   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3759   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input");
3760   if (Ad)     *Ad     = a->A;
3761   if (Ao)     *Ao     = a->B;
3762   if (colmap) *colmap = a->garray;
3763   PetscFunctionReturn(0);
3764 }
3765 
3766 #undef __FUNCT__
3767 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3768 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3769 {
3770   PetscErrorCode ierr;
3771   PetscInt       i;
3772   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3773 
3774   PetscFunctionBegin;
3775   if (coloring->ctype == IS_COLORING_GLOBAL) {
3776     ISColoringValue *allcolors,*colors;
3777     ISColoring      ocoloring;
3778 
3779     /* set coloring for diagonal portion */
3780     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3781 
3782     /* set coloring for off-diagonal portion */
3783     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3784     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3785     for (i=0; i<a->B->cmap->n; i++) {
3786       colors[i] = allcolors[a->garray[i]];
3787     }
3788     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3789     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3790     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3791     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3792   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3793     ISColoringValue *colors;
3794     PetscInt        *larray;
3795     ISColoring      ocoloring;
3796 
3797     /* set coloring for diagonal portion */
3798     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3799     for (i=0; i<a->A->cmap->n; i++) {
3800       larray[i] = i + A->cmap->rstart;
3801     }
3802     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3803     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3804     for (i=0; i<a->A->cmap->n; i++) {
3805       colors[i] = coloring->colors[larray[i]];
3806     }
3807     ierr = PetscFree(larray);CHKERRQ(ierr);
3808     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3809     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3810     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3811 
3812     /* set coloring for off-diagonal portion */
3813     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3814     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3815     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3816     for (i=0; i<a->B->cmap->n; i++) {
3817       colors[i] = coloring->colors[larray[i]];
3818     }
3819     ierr = PetscFree(larray);CHKERRQ(ierr);
3820     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3821     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3822     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3823   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3824   PetscFunctionReturn(0);
3825 }
3826 
3827 #undef __FUNCT__
3828 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3829 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3830 {
3831   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3832   PetscErrorCode ierr;
3833 
3834   PetscFunctionBegin;
3835   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3836   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3837   PetscFunctionReturn(0);
3838 }
3839 
3840 #undef __FUNCT__
3841 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3842 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3843 {
3844   PetscErrorCode ierr;
3845   PetscInt       m,N,i,rstart,nnz,Ii;
3846   PetscInt       *indx;
3847   PetscScalar    *values;
3848 
3849   PetscFunctionBegin;
3850   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3851   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3852     PetscInt       *dnz,*onz,sum,bs,cbs;
3853 
3854     if (n == PETSC_DECIDE) {
3855       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3856     }
3857     /* Check sum(n) = N */
3858     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3859     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3860 
3861     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3862     rstart -= m;
3863 
3864     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3865     for (i=0; i<m; i++) {
3866       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3867       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3868       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3869     }
3870 
3871     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3872     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3873     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3874     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3875     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3876     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3877     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3878   }
3879 
3880   /* numeric phase */
3881   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3882   for (i=0; i<m; i++) {
3883     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3884     Ii   = i + rstart;
3885     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3886     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3887   }
3888   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3889   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3890   PetscFunctionReturn(0);
3891 }
3892 
3893 #undef __FUNCT__
3894 #define __FUNCT__ "MatFileSplit"
3895 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3896 {
3897   PetscErrorCode    ierr;
3898   PetscMPIInt       rank;
3899   PetscInt          m,N,i,rstart,nnz;
3900   size_t            len;
3901   const PetscInt    *indx;
3902   PetscViewer       out;
3903   char              *name;
3904   Mat               B;
3905   const PetscScalar *values;
3906 
3907   PetscFunctionBegin;
3908   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3909   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3910   /* Should this be the type of the diagonal block of A? */
3911   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3912   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3913   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3914   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3915   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3916   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3917   for (i=0; i<m; i++) {
3918     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3919     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3920     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3921   }
3922   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3923   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3924 
3925   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3926   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3927   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3928   sprintf(name,"%s.%d",outfile,rank);
3929   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3930   ierr = PetscFree(name);CHKERRQ(ierr);
3931   ierr = MatView(B,out);CHKERRQ(ierr);
3932   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3933   ierr = MatDestroy(&B);CHKERRQ(ierr);
3934   PetscFunctionReturn(0);
3935 }
3936 
3937 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3938 #undef __FUNCT__
3939 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3940 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3941 {
3942   PetscErrorCode      ierr;
3943   Mat_Merge_SeqsToMPI *merge;
3944   PetscContainer      container;
3945 
3946   PetscFunctionBegin;
3947   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3948   if (container) {
3949     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3950     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3951     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3952     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3953     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3954     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3955     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3956     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3957     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3958     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3959     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3960     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3961     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3962     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3963     ierr = PetscFree(merge);CHKERRQ(ierr);
3964     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3965   }
3966   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3967   PetscFunctionReturn(0);
3968 }
3969 
3970 #include <../src/mat/utils/freespace.h>
3971 #include <petscbt.h>
3972 
3973 #undef __FUNCT__
3974 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3975 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3976 {
3977   PetscErrorCode      ierr;
3978   MPI_Comm            comm;
3979   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3980   PetscMPIInt         size,rank,taga,*len_s;
3981   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3982   PetscInt            proc,m;
3983   PetscInt            **buf_ri,**buf_rj;
3984   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3985   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3986   MPI_Request         *s_waits,*r_waits;
3987   MPI_Status          *status;
3988   MatScalar           *aa=a->a;
3989   MatScalar           **abuf_r,*ba_i;
3990   Mat_Merge_SeqsToMPI *merge;
3991   PetscContainer      container;
3992 
3993   PetscFunctionBegin;
3994   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3995   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3996 
3997   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3998   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3999 
4000   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4001   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4002 
4003   bi     = merge->bi;
4004   bj     = merge->bj;
4005   buf_ri = merge->buf_ri;
4006   buf_rj = merge->buf_rj;
4007 
4008   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4009   owners = merge->rowmap->range;
4010   len_s  = merge->len_s;
4011 
4012   /* send and recv matrix values */
4013   /*-----------------------------*/
4014   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4015   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4016 
4017   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4018   for (proc=0,k=0; proc<size; proc++) {
4019     if (!len_s[proc]) continue;
4020     i    = owners[proc];
4021     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4022     k++;
4023   }
4024 
4025   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4026   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4027   ierr = PetscFree(status);CHKERRQ(ierr);
4028 
4029   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4030   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4031 
4032   /* insert mat values of mpimat */
4033   /*----------------------------*/
4034   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4035   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4036 
4037   for (k=0; k<merge->nrecv; k++) {
4038     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4039     nrows       = *(buf_ri_k[k]);
4040     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4041     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4042   }
4043 
4044   /* set values of ba */
4045   m = merge->rowmap->n;
4046   for (i=0; i<m; i++) {
4047     arow = owners[rank] + i;
4048     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4049     bnzi = bi[i+1] - bi[i];
4050     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4051 
4052     /* add local non-zero vals of this proc's seqmat into ba */
4053     anzi   = ai[arow+1] - ai[arow];
4054     aj     = a->j + ai[arow];
4055     aa     = a->a + ai[arow];
4056     nextaj = 0;
4057     for (j=0; nextaj<anzi; j++) {
4058       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4059         ba_i[j] += aa[nextaj++];
4060       }
4061     }
4062 
4063     /* add received vals into ba */
4064     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4065       /* i-th row */
4066       if (i == *nextrow[k]) {
4067         anzi   = *(nextai[k]+1) - *nextai[k];
4068         aj     = buf_rj[k] + *(nextai[k]);
4069         aa     = abuf_r[k] + *(nextai[k]);
4070         nextaj = 0;
4071         for (j=0; nextaj<anzi; j++) {
4072           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4073             ba_i[j] += aa[nextaj++];
4074           }
4075         }
4076         nextrow[k]++; nextai[k]++;
4077       }
4078     }
4079     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4080   }
4081   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4082   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4083 
4084   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4085   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4086   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4087   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4088   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4089   PetscFunctionReturn(0);
4090 }
4091 
4092 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4093 
4094 #undef __FUNCT__
4095 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4096 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4097 {
4098   PetscErrorCode      ierr;
4099   Mat                 B_mpi;
4100   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4101   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4102   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4103   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4104   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4105   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4106   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4107   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4108   MPI_Status          *status;
4109   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4110   PetscBT             lnkbt;
4111   Mat_Merge_SeqsToMPI *merge;
4112   PetscContainer      container;
4113 
4114   PetscFunctionBegin;
4115   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4116 
4117   /* make sure it is a PETSc comm */
4118   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4119   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4120   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4121 
4122   ierr = PetscNew(&merge);CHKERRQ(ierr);
4123   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4124 
4125   /* determine row ownership */
4126   /*---------------------------------------------------------*/
4127   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4128   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4129   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4130   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4131   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4132   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4133   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4134 
4135   m      = merge->rowmap->n;
4136   owners = merge->rowmap->range;
4137 
4138   /* determine the number of messages to send, their lengths */
4139   /*---------------------------------------------------------*/
4140   len_s = merge->len_s;
4141 
4142   len          = 0; /* length of buf_si[] */
4143   merge->nsend = 0;
4144   for (proc=0; proc<size; proc++) {
4145     len_si[proc] = 0;
4146     if (proc == rank) {
4147       len_s[proc] = 0;
4148     } else {
4149       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4150       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4151     }
4152     if (len_s[proc]) {
4153       merge->nsend++;
4154       nrows = 0;
4155       for (i=owners[proc]; i<owners[proc+1]; i++) {
4156         if (ai[i+1] > ai[i]) nrows++;
4157       }
4158       len_si[proc] = 2*(nrows+1);
4159       len         += len_si[proc];
4160     }
4161   }
4162 
4163   /* determine the number and length of messages to receive for ij-structure */
4164   /*-------------------------------------------------------------------------*/
4165   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4166   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4167 
4168   /* post the Irecv of j-structure */
4169   /*-------------------------------*/
4170   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4171   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4172 
4173   /* post the Isend of j-structure */
4174   /*--------------------------------*/
4175   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4176 
4177   for (proc=0, k=0; proc<size; proc++) {
4178     if (!len_s[proc]) continue;
4179     i    = owners[proc];
4180     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4181     k++;
4182   }
4183 
4184   /* receives and sends of j-structure are complete */
4185   /*------------------------------------------------*/
4186   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4187   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4188 
4189   /* send and recv i-structure */
4190   /*---------------------------*/
4191   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4192   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4193 
4194   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4195   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4196   for (proc=0,k=0; proc<size; proc++) {
4197     if (!len_s[proc]) continue;
4198     /* form outgoing message for i-structure:
4199          buf_si[0]:                 nrows to be sent
4200                [1:nrows]:           row index (global)
4201                [nrows+1:2*nrows+1]: i-structure index
4202     */
4203     /*-------------------------------------------*/
4204     nrows       = len_si[proc]/2 - 1;
4205     buf_si_i    = buf_si + nrows+1;
4206     buf_si[0]   = nrows;
4207     buf_si_i[0] = 0;
4208     nrows       = 0;
4209     for (i=owners[proc]; i<owners[proc+1]; i++) {
4210       anzi = ai[i+1] - ai[i];
4211       if (anzi) {
4212         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4213         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4214         nrows++;
4215       }
4216     }
4217     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4218     k++;
4219     buf_si += len_si[proc];
4220   }
4221 
4222   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4223   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4224 
4225   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4226   for (i=0; i<merge->nrecv; i++) {
4227     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4228   }
4229 
4230   ierr = PetscFree(len_si);CHKERRQ(ierr);
4231   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4232   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4233   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4234   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4235   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4236   ierr = PetscFree(status);CHKERRQ(ierr);
4237 
4238   /* compute a local seq matrix in each processor */
4239   /*----------------------------------------------*/
4240   /* allocate bi array and free space for accumulating nonzero column info */
4241   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4242   bi[0] = 0;
4243 
4244   /* create and initialize a linked list */
4245   nlnk = N+1;
4246   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4247 
4248   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4249   len  = ai[owners[rank+1]] - ai[owners[rank]];
4250   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4251 
4252   current_space = free_space;
4253 
4254   /* determine symbolic info for each local row */
4255   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4256 
4257   for (k=0; k<merge->nrecv; k++) {
4258     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4259     nrows       = *buf_ri_k[k];
4260     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4261     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4262   }
4263 
4264   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4265   len  = 0;
4266   for (i=0; i<m; i++) {
4267     bnzi = 0;
4268     /* add local non-zero cols of this proc's seqmat into lnk */
4269     arow  = owners[rank] + i;
4270     anzi  = ai[arow+1] - ai[arow];
4271     aj    = a->j + ai[arow];
4272     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4273     bnzi += nlnk;
4274     /* add received col data into lnk */
4275     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4276       if (i == *nextrow[k]) { /* i-th row */
4277         anzi  = *(nextai[k]+1) - *nextai[k];
4278         aj    = buf_rj[k] + *nextai[k];
4279         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4280         bnzi += nlnk;
4281         nextrow[k]++; nextai[k]++;
4282       }
4283     }
4284     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4285 
4286     /* if free space is not available, make more free space */
4287     if (current_space->local_remaining<bnzi) {
4288       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4289       nspacedouble++;
4290     }
4291     /* copy data into free space, then initialize lnk */
4292     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4293     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4294 
4295     current_space->array           += bnzi;
4296     current_space->local_used      += bnzi;
4297     current_space->local_remaining -= bnzi;
4298 
4299     bi[i+1] = bi[i] + bnzi;
4300   }
4301 
4302   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4303 
4304   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4305   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4306   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4307 
4308   /* create symbolic parallel matrix B_mpi */
4309   /*---------------------------------------*/
4310   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4311   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4312   if (n==PETSC_DECIDE) {
4313     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4314   } else {
4315     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4316   }
4317   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4318   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4319   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4320   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4321   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4322 
4323   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4324   B_mpi->assembled    = PETSC_FALSE;
4325   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4326   merge->bi           = bi;
4327   merge->bj           = bj;
4328   merge->buf_ri       = buf_ri;
4329   merge->buf_rj       = buf_rj;
4330   merge->coi          = NULL;
4331   merge->coj          = NULL;
4332   merge->owners_co    = NULL;
4333 
4334   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4335 
4336   /* attach the supporting struct to B_mpi for reuse */
4337   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4338   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4339   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4340   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4341   *mpimat = B_mpi;
4342 
4343   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4344   PetscFunctionReturn(0);
4345 }
4346 
4347 #undef __FUNCT__
4348 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4349 /*@C
4350       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4351                  matrices from each processor
4352 
4353     Collective on MPI_Comm
4354 
4355    Input Parameters:
4356 +    comm - the communicators the parallel matrix will live on
4357 .    seqmat - the input sequential matrices
4358 .    m - number of local rows (or PETSC_DECIDE)
4359 .    n - number of local columns (or PETSC_DECIDE)
4360 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4361 
4362    Output Parameter:
4363 .    mpimat - the parallel matrix generated
4364 
4365     Level: advanced
4366 
4367    Notes:
4368      The dimensions of the sequential matrix in each processor MUST be the same.
4369      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4370      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4371 @*/
4372 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4373 {
4374   PetscErrorCode ierr;
4375   PetscMPIInt    size;
4376 
4377   PetscFunctionBegin;
4378   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4379   if (size == 1) {
4380     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4381     if (scall == MAT_INITIAL_MATRIX) {
4382       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4383     } else {
4384       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4385     }
4386     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4387     PetscFunctionReturn(0);
4388   }
4389   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4390   if (scall == MAT_INITIAL_MATRIX) {
4391     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4392   }
4393   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4394   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4395   PetscFunctionReturn(0);
4396 }
4397 
4398 #undef __FUNCT__
4399 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4400 /*@
4401      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4402           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4403           with MatGetSize()
4404 
4405     Not Collective
4406 
4407    Input Parameters:
4408 +    A - the matrix
4409 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4410 
4411    Output Parameter:
4412 .    A_loc - the local sequential matrix generated
4413 
4414     Level: developer
4415 
4416 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4417 
4418 @*/
4419 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4420 {
4421   PetscErrorCode ierr;
4422   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4423   Mat_SeqAIJ     *mat,*a,*b;
4424   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4425   MatScalar      *aa,*ba,*cam;
4426   PetscScalar    *ca;
4427   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4428   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4429   PetscBool      match;
4430   MPI_Comm       comm;
4431   PetscMPIInt    size;
4432 
4433   PetscFunctionBegin;
4434   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4435   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4436   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4437   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4438   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4439 
4440   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4441   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4442   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4443   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4444   aa = a->a; ba = b->a;
4445   if (scall == MAT_INITIAL_MATRIX) {
4446     if (size == 1) {
4447       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4448       PetscFunctionReturn(0);
4449     }
4450 
4451     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4452     ci[0] = 0;
4453     for (i=0; i<am; i++) {
4454       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4455     }
4456     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4457     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4458     k    = 0;
4459     for (i=0; i<am; i++) {
4460       ncols_o = bi[i+1] - bi[i];
4461       ncols_d = ai[i+1] - ai[i];
4462       /* off-diagonal portion of A */
4463       for (jo=0; jo<ncols_o; jo++) {
4464         col = cmap[*bj];
4465         if (col >= cstart) break;
4466         cj[k]   = col; bj++;
4467         ca[k++] = *ba++;
4468       }
4469       /* diagonal portion of A */
4470       for (j=0; j<ncols_d; j++) {
4471         cj[k]   = cstart + *aj++;
4472         ca[k++] = *aa++;
4473       }
4474       /* off-diagonal portion of A */
4475       for (j=jo; j<ncols_o; j++) {
4476         cj[k]   = cmap[*bj++];
4477         ca[k++] = *ba++;
4478       }
4479     }
4480     /* put together the new matrix */
4481     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4482     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4483     /* Since these are PETSc arrays, change flags to free them as necessary. */
4484     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4485     mat->free_a  = PETSC_TRUE;
4486     mat->free_ij = PETSC_TRUE;
4487     mat->nonew   = 0;
4488   } else if (scall == MAT_REUSE_MATRIX) {
4489     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4490     ci = mat->i; cj = mat->j; cam = mat->a;
4491     for (i=0; i<am; i++) {
4492       /* off-diagonal portion of A */
4493       ncols_o = bi[i+1] - bi[i];
4494       for (jo=0; jo<ncols_o; jo++) {
4495         col = cmap[*bj];
4496         if (col >= cstart) break;
4497         *cam++ = *ba++; bj++;
4498       }
4499       /* diagonal portion of A */
4500       ncols_d = ai[i+1] - ai[i];
4501       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4502       /* off-diagonal portion of A */
4503       for (j=jo; j<ncols_o; j++) {
4504         *cam++ = *ba++; bj++;
4505       }
4506     }
4507   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4508   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4509   PetscFunctionReturn(0);
4510 }
4511 
4512 #undef __FUNCT__
4513 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4514 /*@C
4515      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4516 
4517     Not Collective
4518 
4519    Input Parameters:
4520 +    A - the matrix
4521 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4522 -    row, col - index sets of rows and columns to extract (or NULL)
4523 
4524    Output Parameter:
4525 .    A_loc - the local sequential matrix generated
4526 
4527     Level: developer
4528 
4529 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4530 
4531 @*/
4532 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4533 {
4534   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4535   PetscErrorCode ierr;
4536   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4537   IS             isrowa,iscola;
4538   Mat            *aloc;
4539   PetscBool      match;
4540 
4541   PetscFunctionBegin;
4542   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4543   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4544   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4545   if (!row) {
4546     start = A->rmap->rstart; end = A->rmap->rend;
4547     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4548   } else {
4549     isrowa = *row;
4550   }
4551   if (!col) {
4552     start = A->cmap->rstart;
4553     cmap  = a->garray;
4554     nzA   = a->A->cmap->n;
4555     nzB   = a->B->cmap->n;
4556     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4557     ncols = 0;
4558     for (i=0; i<nzB; i++) {
4559       if (cmap[i] < start) idx[ncols++] = cmap[i];
4560       else break;
4561     }
4562     imark = i;
4563     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4564     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4565     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4566   } else {
4567     iscola = *col;
4568   }
4569   if (scall != MAT_INITIAL_MATRIX) {
4570     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4571     aloc[0] = *A_loc;
4572   }
4573   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4574   *A_loc = aloc[0];
4575   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4576   if (!row) {
4577     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4578   }
4579   if (!col) {
4580     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4581   }
4582   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4583   PetscFunctionReturn(0);
4584 }
4585 
4586 #undef __FUNCT__
4587 #define __FUNCT__ "MatGetBrowsOfAcols"
4588 /*@C
4589     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4590 
4591     Collective on Mat
4592 
4593    Input Parameters:
4594 +    A,B - the matrices in mpiaij format
4595 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4596 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4597 
4598    Output Parameter:
4599 +    rowb, colb - index sets of rows and columns of B to extract
4600 -    B_seq - the sequential matrix generated
4601 
4602     Level: developer
4603 
4604 @*/
4605 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4606 {
4607   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4608   PetscErrorCode ierr;
4609   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4610   IS             isrowb,iscolb;
4611   Mat            *bseq=NULL;
4612 
4613   PetscFunctionBegin;
4614   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4615     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4616   }
4617   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4618 
4619   if (scall == MAT_INITIAL_MATRIX) {
4620     start = A->cmap->rstart;
4621     cmap  = a->garray;
4622     nzA   = a->A->cmap->n;
4623     nzB   = a->B->cmap->n;
4624     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4625     ncols = 0;
4626     for (i=0; i<nzB; i++) {  /* row < local row index */
4627       if (cmap[i] < start) idx[ncols++] = cmap[i];
4628       else break;
4629     }
4630     imark = i;
4631     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4632     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4633     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4634     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4635   } else {
4636     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4637     isrowb  = *rowb; iscolb = *colb;
4638     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4639     bseq[0] = *B_seq;
4640   }
4641   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4642   *B_seq = bseq[0];
4643   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4644   if (!rowb) {
4645     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4646   } else {
4647     *rowb = isrowb;
4648   }
4649   if (!colb) {
4650     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4651   } else {
4652     *colb = iscolb;
4653   }
4654   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4655   PetscFunctionReturn(0);
4656 }
4657 
4658 #undef __FUNCT__
4659 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4660 /*
4661     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4662     of the OFF-DIAGONAL portion of local A
4663 
4664     Collective on Mat
4665 
4666    Input Parameters:
4667 +    A,B - the matrices in mpiaij format
4668 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4669 
4670    Output Parameter:
4671 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4672 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4673 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4674 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4675 
4676     Level: developer
4677 
4678 */
4679 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4680 {
4681   VecScatter_MPI_General *gen_to,*gen_from;
4682   PetscErrorCode         ierr;
4683   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4684   Mat_SeqAIJ             *b_oth;
4685   VecScatter             ctx =a->Mvctx;
4686   MPI_Comm               comm;
4687   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4688   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4689   PetscScalar            *rvalues,*svalues;
4690   MatScalar              *b_otha,*bufa,*bufA;
4691   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4692   MPI_Request            *rwaits = NULL,*swaits = NULL;
4693   MPI_Status             *sstatus,rstatus;
4694   PetscMPIInt            jj,size;
4695   PetscInt               *cols,sbs,rbs;
4696   PetscScalar            *vals;
4697 
4698   PetscFunctionBegin;
4699   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4700   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4701 
4702   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4703     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4704   }
4705   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4706   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4707 
4708   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4709   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4710   rvalues  = gen_from->values; /* holds the length of receiving row */
4711   svalues  = gen_to->values;   /* holds the length of sending row */
4712   nrecvs   = gen_from->n;
4713   nsends   = gen_to->n;
4714 
4715   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4716   srow    = gen_to->indices;    /* local row index to be sent */
4717   sstarts = gen_to->starts;
4718   sprocs  = gen_to->procs;
4719   sstatus = gen_to->sstatus;
4720   sbs     = gen_to->bs;
4721   rstarts = gen_from->starts;
4722   rprocs  = gen_from->procs;
4723   rbs     = gen_from->bs;
4724 
4725   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4726   if (scall == MAT_INITIAL_MATRIX) {
4727     /* i-array */
4728     /*---------*/
4729     /*  post receives */
4730     for (i=0; i<nrecvs; i++) {
4731       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4732       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4733       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4734     }
4735 
4736     /* pack the outgoing message */
4737     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4738 
4739     sstartsj[0] = 0;
4740     rstartsj[0] = 0;
4741     len         = 0; /* total length of j or a array to be sent */
4742     k           = 0;
4743     for (i=0; i<nsends; i++) {
4744       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4745       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4746       for (j=0; j<nrows; j++) {
4747         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4748         for (l=0; l<sbs; l++) {
4749           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4750 
4751           rowlen[j*sbs+l] = ncols;
4752 
4753           len += ncols;
4754           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4755         }
4756         k++;
4757       }
4758       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4759 
4760       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4761     }
4762     /* recvs and sends of i-array are completed */
4763     i = nrecvs;
4764     while (i--) {
4765       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4766     }
4767     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4768 
4769     /* allocate buffers for sending j and a arrays */
4770     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4771     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4772 
4773     /* create i-array of B_oth */
4774     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4775 
4776     b_othi[0] = 0;
4777     len       = 0; /* total length of j or a array to be received */
4778     k         = 0;
4779     for (i=0; i<nrecvs; i++) {
4780       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4781       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4782       for (j=0; j<nrows; j++) {
4783         b_othi[k+1] = b_othi[k] + rowlen[j];
4784         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4785         k++;
4786       }
4787       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4788     }
4789 
4790     /* allocate space for j and a arrrays of B_oth */
4791     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4792     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4793 
4794     /* j-array */
4795     /*---------*/
4796     /*  post receives of j-array */
4797     for (i=0; i<nrecvs; i++) {
4798       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4799       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4800     }
4801 
4802     /* pack the outgoing message j-array */
4803     k = 0;
4804     for (i=0; i<nsends; i++) {
4805       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4806       bufJ  = bufj+sstartsj[i];
4807       for (j=0; j<nrows; j++) {
4808         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4809         for (ll=0; ll<sbs; ll++) {
4810           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4811           for (l=0; l<ncols; l++) {
4812             *bufJ++ = cols[l];
4813           }
4814           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4815         }
4816       }
4817       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4818     }
4819 
4820     /* recvs and sends of j-array are completed */
4821     i = nrecvs;
4822     while (i--) {
4823       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4824     }
4825     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4826   } else if (scall == MAT_REUSE_MATRIX) {
4827     sstartsj = *startsj_s;
4828     rstartsj = *startsj_r;
4829     bufa     = *bufa_ptr;
4830     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4831     b_otha   = b_oth->a;
4832   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4833 
4834   /* a-array */
4835   /*---------*/
4836   /*  post receives of a-array */
4837   for (i=0; i<nrecvs; i++) {
4838     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4839     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4840   }
4841 
4842   /* pack the outgoing message a-array */
4843   k = 0;
4844   for (i=0; i<nsends; i++) {
4845     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4846     bufA  = bufa+sstartsj[i];
4847     for (j=0; j<nrows; j++) {
4848       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4849       for (ll=0; ll<sbs; ll++) {
4850         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4851         for (l=0; l<ncols; l++) {
4852           *bufA++ = vals[l];
4853         }
4854         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4855       }
4856     }
4857     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4858   }
4859   /* recvs and sends of a-array are completed */
4860   i = nrecvs;
4861   while (i--) {
4862     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4863   }
4864   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4865   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4866 
4867   if (scall == MAT_INITIAL_MATRIX) {
4868     /* put together the new matrix */
4869     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4870 
4871     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4872     /* Since these are PETSc arrays, change flags to free them as necessary. */
4873     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4874     b_oth->free_a  = PETSC_TRUE;
4875     b_oth->free_ij = PETSC_TRUE;
4876     b_oth->nonew   = 0;
4877 
4878     ierr = PetscFree(bufj);CHKERRQ(ierr);
4879     if (!startsj_s || !bufa_ptr) {
4880       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4881       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4882     } else {
4883       *startsj_s = sstartsj;
4884       *startsj_r = rstartsj;
4885       *bufa_ptr  = bufa;
4886     }
4887   }
4888   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4889   PetscFunctionReturn(0);
4890 }
4891 
4892 #undef __FUNCT__
4893 #define __FUNCT__ "MatGetCommunicationStructs"
4894 /*@C
4895   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4896 
4897   Not Collective
4898 
4899   Input Parameters:
4900 . A - The matrix in mpiaij format
4901 
4902   Output Parameter:
4903 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4904 . colmap - A map from global column index to local index into lvec
4905 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4906 
4907   Level: developer
4908 
4909 @*/
4910 #if defined(PETSC_USE_CTABLE)
4911 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4912 #else
4913 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4914 #endif
4915 {
4916   Mat_MPIAIJ *a;
4917 
4918   PetscFunctionBegin;
4919   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4920   PetscValidPointer(lvec, 2);
4921   PetscValidPointer(colmap, 3);
4922   PetscValidPointer(multScatter, 4);
4923   a = (Mat_MPIAIJ*) A->data;
4924   if (lvec) *lvec = a->lvec;
4925   if (colmap) *colmap = a->colmap;
4926   if (multScatter) *multScatter = a->Mvctx;
4927   PetscFunctionReturn(0);
4928 }
4929 
4930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4933 #if defined(PETSC_HAVE_ELEMENTAL)
4934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4935 #endif
4936 
4937 #undef __FUNCT__
4938 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4939 /*
4940     Computes (B'*A')' since computing B*A directly is untenable
4941 
4942                n                       p                          p
4943         (              )       (              )         (                  )
4944       m (      A       )  *  n (       B      )   =   m (         C        )
4945         (              )       (              )         (                  )
4946 
4947 */
4948 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4949 {
4950   PetscErrorCode ierr;
4951   Mat            At,Bt,Ct;
4952 
4953   PetscFunctionBegin;
4954   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4955   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4956   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4957   ierr = MatDestroy(&At);CHKERRQ(ierr);
4958   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4959   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4960   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4961   PetscFunctionReturn(0);
4962 }
4963 
4964 #undef __FUNCT__
4965 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4966 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4967 {
4968   PetscErrorCode ierr;
4969   PetscInt       m=A->rmap->n,n=B->cmap->n;
4970   Mat            Cmat;
4971 
4972   PetscFunctionBegin;
4973   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4974   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4975   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4976   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4977   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4978   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4979   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4980   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4981 
4982   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4983 
4984   *C = Cmat;
4985   PetscFunctionReturn(0);
4986 }
4987 
4988 /* ----------------------------------------------------------------*/
4989 #undef __FUNCT__
4990 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4991 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4992 {
4993   PetscErrorCode ierr;
4994 
4995   PetscFunctionBegin;
4996   if (scall == MAT_INITIAL_MATRIX) {
4997     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4998     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4999     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5000   }
5001   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5002   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5003   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5004   PetscFunctionReturn(0);
5005 }
5006 
5007 /*MC
5008    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5009 
5010    Options Database Keys:
5011 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5012 
5013   Level: beginner
5014 
5015 .seealso: MatCreateAIJ()
5016 M*/
5017 
5018 #undef __FUNCT__
5019 #define __FUNCT__ "MatCreate_MPIAIJ"
5020 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5021 {
5022   Mat_MPIAIJ     *b;
5023   PetscErrorCode ierr;
5024   PetscMPIInt    size;
5025 
5026   PetscFunctionBegin;
5027   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5028 
5029   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5030   B->data       = (void*)b;
5031   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5032   B->assembled  = PETSC_FALSE;
5033   B->insertmode = NOT_SET_VALUES;
5034   b->size       = size;
5035 
5036   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5037 
5038   /* build cache for off array entries formed */
5039   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5040 
5041   b->donotstash  = PETSC_FALSE;
5042   b->colmap      = 0;
5043   b->garray      = 0;
5044   b->roworiented = PETSC_TRUE;
5045 
5046   /* stuff used for matrix vector multiply */
5047   b->lvec  = NULL;
5048   b->Mvctx = NULL;
5049 
5050   /* stuff for MatGetRow() */
5051   b->rowindices   = 0;
5052   b->rowvalues    = 0;
5053   b->getrowactive = PETSC_FALSE;
5054 
5055   /* flexible pointer used in CUSP/CUSPARSE classes */
5056   b->spptr = NULL;
5057 
5058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5068   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5069 #if defined(PETSC_HAVE_ELEMENTAL)
5070   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5071 #endif
5072   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5073   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5074   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5075   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5076   PetscFunctionReturn(0);
5077 }
5078 
5079 #undef __FUNCT__
5080 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5081 /*@C
5082      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5083          and "off-diagonal" part of the matrix in CSR format.
5084 
5085    Collective on MPI_Comm
5086 
5087    Input Parameters:
5088 +  comm - MPI communicator
5089 .  m - number of local rows (Cannot be PETSC_DECIDE)
5090 .  n - This value should be the same as the local size used in creating the
5091        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5092        calculated if N is given) For square matrices n is almost always m.
5093 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5094 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5095 .   i - row indices for "diagonal" portion of matrix
5096 .   j - column indices
5097 .   a - matrix values
5098 .   oi - row indices for "off-diagonal" portion of matrix
5099 .   oj - column indices
5100 -   oa - matrix values
5101 
5102    Output Parameter:
5103 .   mat - the matrix
5104 
5105    Level: advanced
5106 
5107    Notes:
5108        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5109        must free the arrays once the matrix has been destroyed and not before.
5110 
5111        The i and j indices are 0 based
5112 
5113        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5114 
5115        This sets local rows and cannot be used to set off-processor values.
5116 
5117        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5118        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5119        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5120        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5121        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5122        communication if it is known that only local entries will be set.
5123 
5124 .keywords: matrix, aij, compressed row, sparse, parallel
5125 
5126 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5127           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5128 @*/
5129 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5130 {
5131   PetscErrorCode ierr;
5132   Mat_MPIAIJ     *maij;
5133 
5134   PetscFunctionBegin;
5135   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5136   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5137   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5138   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5139   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5140   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5141   maij = (Mat_MPIAIJ*) (*mat)->data;
5142 
5143   (*mat)->preallocated = PETSC_TRUE;
5144 
5145   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5146   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5147 
5148   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5149   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5150 
5151   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5152   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5153   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5154   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5155 
5156   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5157   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5158   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5159   PetscFunctionReturn(0);
5160 }
5161 
5162 /*
5163     Special version for direct calls from Fortran
5164 */
5165 #include <petsc/private/fortranimpl.h>
5166 
5167 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5168 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5169 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5170 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5171 #endif
5172 
5173 /* Change these macros so can be used in void function */
5174 #undef CHKERRQ
5175 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5176 #undef SETERRQ2
5177 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5178 #undef SETERRQ3
5179 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5180 #undef SETERRQ
5181 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5182 
5183 #undef __FUNCT__
5184 #define __FUNCT__ "matsetvaluesmpiaij_"
5185 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5186 {
5187   Mat            mat  = *mmat;
5188   PetscInt       m    = *mm, n = *mn;
5189   InsertMode     addv = *maddv;
5190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5191   PetscScalar    value;
5192   PetscErrorCode ierr;
5193 
5194   MatCheckPreallocated(mat,1);
5195   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5196 
5197 #if defined(PETSC_USE_DEBUG)
5198   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5199 #endif
5200   {
5201     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5202     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5203     PetscBool roworiented = aij->roworiented;
5204 
5205     /* Some Variables required in the macro */
5206     Mat        A                 = aij->A;
5207     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5208     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5209     MatScalar  *aa               = a->a;
5210     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5211     Mat        B                 = aij->B;
5212     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5213     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5214     MatScalar  *ba               = b->a;
5215 
5216     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5217     PetscInt  nonew = a->nonew;
5218     MatScalar *ap1,*ap2;
5219 
5220     PetscFunctionBegin;
5221     for (i=0; i<m; i++) {
5222       if (im[i] < 0) continue;
5223 #if defined(PETSC_USE_DEBUG)
5224       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5225 #endif
5226       if (im[i] >= rstart && im[i] < rend) {
5227         row      = im[i] - rstart;
5228         lastcol1 = -1;
5229         rp1      = aj + ai[row];
5230         ap1      = aa + ai[row];
5231         rmax1    = aimax[row];
5232         nrow1    = ailen[row];
5233         low1     = 0;
5234         high1    = nrow1;
5235         lastcol2 = -1;
5236         rp2      = bj + bi[row];
5237         ap2      = ba + bi[row];
5238         rmax2    = bimax[row];
5239         nrow2    = bilen[row];
5240         low2     = 0;
5241         high2    = nrow2;
5242 
5243         for (j=0; j<n; j++) {
5244           if (roworiented) value = v[i*n+j];
5245           else value = v[i+j*m];
5246           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5247           if (in[j] >= cstart && in[j] < cend) {
5248             col = in[j] - cstart;
5249             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5250           } else if (in[j] < 0) continue;
5251 #if defined(PETSC_USE_DEBUG)
5252           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5253 #endif
5254           else {
5255             if (mat->was_assembled) {
5256               if (!aij->colmap) {
5257                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5258               }
5259 #if defined(PETSC_USE_CTABLE)
5260               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5261               col--;
5262 #else
5263               col = aij->colmap[in[j]] - 1;
5264 #endif
5265               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5266                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5267                 col  =  in[j];
5268                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5269                 B     = aij->B;
5270                 b     = (Mat_SeqAIJ*)B->data;
5271                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5272                 rp2   = bj + bi[row];
5273                 ap2   = ba + bi[row];
5274                 rmax2 = bimax[row];
5275                 nrow2 = bilen[row];
5276                 low2  = 0;
5277                 high2 = nrow2;
5278                 bm    = aij->B->rmap->n;
5279                 ba    = b->a;
5280               }
5281             } else col = in[j];
5282             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5283           }
5284         }
5285       } else if (!aij->donotstash) {
5286         if (roworiented) {
5287           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5288         } else {
5289           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5290         }
5291       }
5292     }
5293   }
5294   PetscFunctionReturnVoid();
5295 }
5296 
5297