xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f58dd5f4cb6ba73c65eb3aa3bfd0a46c254751dc)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *lrows;
787   PetscInt       r, len;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791   /* get locally owned rows */
792   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
793   /* fix right hand side if needed */
794   if (x && b) {
795     const PetscScalar *xx;
796     PetscScalar       *bb;
797 
798     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
799     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
800     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
801     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
802     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
803   }
804   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
805   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
806   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
807     PetscBool cong;
808     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
809     if (cong) A->congruentlayouts = 1;
810     else      A->congruentlayouts = 0;
811   }
812   if ((diag != 0.0) && A->congruentlayouts) {
813     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
814   } else if (diag != 0.0) {
815     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827 
828   /* only change matrix nonzero state if pattern was allowed to be changed */
829   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
830     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
831     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,p = 0,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x) {
888     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
891     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
892   }
893   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
894   /* remove zeroed rows of off diagonal matrix */
895   ii = aij->i;
896   for (i=0; i<len; i++) {
897     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
898   }
899   /* loop over all elements of off process part of matrix zeroing removed columns*/
900   if (aij->compressedrow.use) {
901     m    = aij->compressedrow.nrows;
902     ii   = aij->compressedrow.i;
903     ridx = aij->compressedrow.rindex;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908 
909       for (j=0; j<n; j++) {
910         if (PetscAbsScalar(mask[*aj])) {
911           if (b) bb[*ridx] -= *aa*xx[*aj];
912           *aa = 0.0;
913         }
914         aa++;
915         aj++;
916       }
917       ridx++;
918     }
919   } else { /* do not use compressed row format */
920     m = l->B->rmap->n;
921     for (i=0; i<m; i++) {
922       n  = ii[i+1] - ii[i];
923       aj = aij->j + ii[i];
924       aa = aij->a + ii[i];
925       for (j=0; j<n; j++) {
926         if (PetscAbsScalar(mask[*aj])) {
927           if (b) bb[i] -= *aa*xx[*aj];
928           *aa = 0.0;
929         }
930         aa++;
931         aj++;
932       }
933     }
934   }
935   if (x) {
936     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
937     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
938   }
939   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
940   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
941   ierr = PetscFree(lrows);CHKERRQ(ierr);
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMult_MPIAIJ"
953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
954 {
955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       nt;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
962   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMultAdd_MPIAIJ"
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002   PetscBool      merged;
1003 
1004   PetscFunctionBegin;
1005   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1006   /* do nondiagonal part */
1007   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1008   if (!merged) {
1009     /* send it on its way */
1010     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* receive remote parts: note this assumes the values are not actually */
1014     /* added in yy until the next line, */
1015     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   } else {
1017     /* do local part */
1018     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1019     /* send it on its way */
1020     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021     /* values actually were received in the Begin() but we need to call this nop */
1022     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1029 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1030 {
1031   MPI_Comm       comm;
1032   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1033   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1034   IS             Me,Notme;
1035   PetscErrorCode ierr;
1036   PetscInt       M,N,first,last,*notme,i;
1037   PetscMPIInt    size;
1038 
1039   PetscFunctionBegin;
1040   /* Easy test: symmetric diagonal block */
1041   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1042   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   /* do nondiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   /* send it on its way */
1080   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   /* do local part */
1082   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* receive remote parts */
1084   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /*
1089   This only works correctly for square matrices where the subblock A->A is the
1090    diagonal block
1091 */
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1095 {
1096   PetscErrorCode ierr;
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1101   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1102   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatScale_MPIAIJ"
1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1115   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatDestroy_MPIAIJ"
1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1122 {
1123   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127 #if defined(PETSC_USE_LOG)
1128   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1129 #endif
1130   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1131   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1132   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1133   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1134 #if defined(PETSC_USE_CTABLE)
1135   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1136 #else
1137   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1138 #endif
1139   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1140   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1141   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1142   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1143   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1144   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1145 
1146   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1154 #if defined(PETSC_HAVE_ELEMENTAL)
1155   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1156 #endif
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1162 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1163 {
1164   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1165   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1166   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1167   PetscErrorCode ierr;
1168   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1169   int            fd;
1170   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1171   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1172   PetscScalar    *column_values;
1173   PetscInt       message_count,flowcontrolcount;
1174   FILE           *file;
1175 
1176   PetscFunctionBegin;
1177   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1178   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1179   nz   = A->nz + B->nz;
1180   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1181   if (!rank) {
1182     header[0] = MAT_FILE_CLASSID;
1183     header[1] = mat->rmap->N;
1184     header[2] = mat->cmap->N;
1185 
1186     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1187     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1188     /* get largest number of rows any processor has */
1189     rlen  = mat->rmap->n;
1190     range = mat->rmap->range;
1191     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1192   } else {
1193     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1194     rlen = mat->rmap->n;
1195   }
1196 
1197   /* load up the local row counts */
1198   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1199   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1200 
1201   /* store the row lengths to the file */
1202   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1203   if (!rank) {
1204     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     for (i=1; i<size; i++) {
1206       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1207       rlen = range[i+1] - range[i];
1208       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1209       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1210     }
1211     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1212   } else {
1213     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1214     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1215     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1216   }
1217   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1218 
1219   /* load up the local column indices */
1220   nzmax = nz; /* th processor needs space a largest processor needs */
1221   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1222   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1223   cnt   = 0;
1224   for (i=0; i<mat->rmap->n; i++) {
1225     for (j=B->i[i]; j<B->i[i+1]; j++) {
1226       if ((col = garray[B->j[j]]) > cstart) break;
1227       column_indices[cnt++] = col;
1228     }
1229     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1230     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1231   }
1232   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1233 
1234   /* store the column indices to the file */
1235   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1236   if (!rank) {
1237     MPI_Status status;
1238     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1239     for (i=1; i<size; i++) {
1240       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1241       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1242       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1243       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1244       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1245     }
1246     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1247   } else {
1248     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1249     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1250     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1251     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1254 
1255   /* load up the local column values */
1256   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1257   cnt  = 0;
1258   for (i=0; i<mat->rmap->n; i++) {
1259     for (j=B->i[i]; j<B->i[i+1]; j++) {
1260       if (garray[B->j[j]] > cstart) break;
1261       column_values[cnt++] = B->a[j];
1262     }
1263     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1264     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1265   }
1266   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1267 
1268   /* store the column values to the file */
1269   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1270   if (!rank) {
1271     MPI_Status status;
1272     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1273     for (i=1; i<size; i++) {
1274       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1275       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1276       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1277       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1278       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1279     }
1280     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1281   } else {
1282     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1283     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1284     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1285     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1286   }
1287   ierr = PetscFree(column_values);CHKERRQ(ierr);
1288 
1289   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1290   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #include <petscdraw.h>
1295 #undef __FUNCT__
1296 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1297 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1298 {
1299   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1300   PetscErrorCode    ierr;
1301   PetscMPIInt       rank = aij->rank,size = aij->size;
1302   PetscBool         isdraw,iascii,isbinary;
1303   PetscViewer       sviewer;
1304   PetscViewerFormat format;
1305 
1306   PetscFunctionBegin;
1307   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1308   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1309   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1310   if (iascii) {
1311     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1312     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1313       MatInfo   info;
1314       PetscBool inodes;
1315 
1316       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1317       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1318       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1319       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1320       if (!inodes) {
1321         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1322                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1323       } else {
1324         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1325                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1326       }
1327       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1328       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1329       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1330       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1331       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1332       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1333       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1334       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1335       PetscFunctionReturn(0);
1336     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1337       PetscInt inodecount,inodelimit,*inodes;
1338       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1339       if (inodes) {
1340         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1341       } else {
1342         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1343       }
1344       PetscFunctionReturn(0);
1345     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1346       PetscFunctionReturn(0);
1347     }
1348   } else if (isbinary) {
1349     if (size == 1) {
1350       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1351       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1352     } else {
1353       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1354     }
1355     PetscFunctionReturn(0);
1356   } else if (isdraw) {
1357     PetscDraw draw;
1358     PetscBool isnull;
1359     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1360     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1361     if (isnull) PetscFunctionReturn(0);
1362   }
1363 
1364   {
1365     /* assemble the entire matrix onto first processor. */
1366     Mat        A;
1367     Mat_SeqAIJ *Aloc;
1368     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1369     MatScalar  *a;
1370 
1371     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1372     if (!rank) {
1373       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1374     } else {
1375       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1376     }
1377     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1378     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1379     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1380     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1381     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1382 
1383     /* copy over the A part */
1384     Aloc = (Mat_SeqAIJ*)aij->A->data;
1385     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1386     row  = mat->rmap->rstart;
1387     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1388     for (i=0; i<m; i++) {
1389       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1390       row++;
1391       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1392     }
1393     aj = Aloc->j;
1394     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1395 
1396     /* copy over the B part */
1397     Aloc = (Mat_SeqAIJ*)aij->B->data;
1398     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1399     row  = mat->rmap->rstart;
1400     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1401     ct   = cols;
1402     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1403     for (i=0; i<m; i++) {
1404       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1405       row++;
1406       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1407     }
1408     ierr = PetscFree(ct);CHKERRQ(ierr);
1409     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1410     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411     /*
1412        Everyone has to call to draw the matrix since the graphics waits are
1413        synchronized across all processors that share the PetscDraw object
1414     */
1415     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1416     if (!rank) {
1417       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1418       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1419     }
1420     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1421     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1422     ierr = MatDestroy(&A);CHKERRQ(ierr);
1423   }
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatView_MPIAIJ"
1429 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1430 {
1431   PetscErrorCode ierr;
1432   PetscBool      iascii,isdraw,issocket,isbinary;
1433 
1434   PetscFunctionBegin;
1435   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1436   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1437   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1438   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1439   if (iascii || isdraw || isbinary || issocket) {
1440     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1441   }
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatSOR_MPIAIJ"
1447 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1448 {
1449   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1450   PetscErrorCode ierr;
1451   Vec            bb1 = 0;
1452   PetscBool      hasop;
1453 
1454   PetscFunctionBegin;
1455   if (flag == SOR_APPLY_UPPER) {
1456     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1457     PetscFunctionReturn(0);
1458   }
1459 
1460   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1461     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1462   }
1463 
1464   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1465     if (flag & SOR_ZERO_INITIAL_GUESS) {
1466       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1467       its--;
1468     }
1469 
1470     while (its--) {
1471       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1472       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1473 
1474       /* update rhs: bb1 = bb - B*x */
1475       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1476       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1477 
1478       /* local sweep */
1479       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1480     }
1481   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1482     if (flag & SOR_ZERO_INITIAL_GUESS) {
1483       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1484       its--;
1485     }
1486     while (its--) {
1487       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1488       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1489 
1490       /* update rhs: bb1 = bb - B*x */
1491       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1492       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1493 
1494       /* local sweep */
1495       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1496     }
1497   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1498     if (flag & SOR_ZERO_INITIAL_GUESS) {
1499       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1500       its--;
1501     }
1502     while (its--) {
1503       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505 
1506       /* update rhs: bb1 = bb - B*x */
1507       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1508       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1509 
1510       /* local sweep */
1511       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1512     }
1513   } else if (flag & SOR_EISENSTAT) {
1514     Vec xx1;
1515 
1516     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1517     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1518 
1519     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1521     if (!mat->diag) {
1522       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1523       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1524     }
1525     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1526     if (hasop) {
1527       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1528     } else {
1529       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1530     }
1531     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1532 
1533     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1534 
1535     /* local sweep */
1536     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1537     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1538     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1539   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1540 
1541   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1542 
1543   matin->factorerrortype = mat->A->factorerrortype;
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatPermute_MPIAIJ"
1549 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1550 {
1551   Mat            aA,aB,Aperm;
1552   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1553   PetscScalar    *aa,*ba;
1554   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1555   PetscSF        rowsf,sf;
1556   IS             parcolp = NULL;
1557   PetscBool      done;
1558   PetscErrorCode ierr;
1559 
1560   PetscFunctionBegin;
1561   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1562   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1563   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1564   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1565 
1566   /* Invert row permutation to find out where my rows should go */
1567   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1568   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1569   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1570   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1571   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1572   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1573 
1574   /* Invert column permutation to find out where my columns should go */
1575   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1576   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1577   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1578   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1579   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1580   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1581   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1582 
1583   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1584   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1585   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1586 
1587   /* Find out where my gcols should go */
1588   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1589   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1590   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1591   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1592   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1593   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1594   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1595   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1596 
1597   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1598   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1599   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1600   for (i=0; i<m; i++) {
1601     PetscInt row = rdest[i],rowner;
1602     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1603     for (j=ai[i]; j<ai[i+1]; j++) {
1604       PetscInt cowner,col = cdest[aj[j]];
1605       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1606       if (rowner == cowner) dnnz[i]++;
1607       else onnz[i]++;
1608     }
1609     for (j=bi[i]; j<bi[i+1]; j++) {
1610       PetscInt cowner,col = gcdest[bj[j]];
1611       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1612       if (rowner == cowner) dnnz[i]++;
1613       else onnz[i]++;
1614     }
1615   }
1616   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1617   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1618   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1619   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1620   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1621 
1622   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1623   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1624   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1625   for (i=0; i<m; i++) {
1626     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1627     PetscInt j0,rowlen;
1628     rowlen = ai[i+1] - ai[i];
1629     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1630       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1631       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1632     }
1633     rowlen = bi[i+1] - bi[i];
1634     for (j0=j=0; j<rowlen; j0=j) {
1635       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1636       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1637     }
1638   }
1639   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1640   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1641   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1642   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1643   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1644   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1645   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1646   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1647   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1648   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1649   *B = Aperm;
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 #undef __FUNCT__
1654 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1655 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1656 {
1657   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1658   PetscErrorCode ierr;
1659 
1660   PetscFunctionBegin;
1661   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1662   if (ghosts) *ghosts = aij->garray;
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 #undef __FUNCT__
1667 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1668 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1669 {
1670   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1671   Mat            A    = mat->A,B = mat->B;
1672   PetscErrorCode ierr;
1673   PetscReal      isend[5],irecv[5];
1674 
1675   PetscFunctionBegin;
1676   info->block_size = 1.0;
1677   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1678 
1679   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1680   isend[3] = info->memory;  isend[4] = info->mallocs;
1681 
1682   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1683 
1684   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1685   isend[3] += info->memory;  isend[4] += info->mallocs;
1686   if (flag == MAT_LOCAL) {
1687     info->nz_used      = isend[0];
1688     info->nz_allocated = isend[1];
1689     info->nz_unneeded  = isend[2];
1690     info->memory       = isend[3];
1691     info->mallocs      = isend[4];
1692   } else if (flag == MAT_GLOBAL_MAX) {
1693     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1694 
1695     info->nz_used      = irecv[0];
1696     info->nz_allocated = irecv[1];
1697     info->nz_unneeded  = irecv[2];
1698     info->memory       = irecv[3];
1699     info->mallocs      = irecv[4];
1700   } else if (flag == MAT_GLOBAL_SUM) {
1701     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1702 
1703     info->nz_used      = irecv[0];
1704     info->nz_allocated = irecv[1];
1705     info->nz_unneeded  = irecv[2];
1706     info->memory       = irecv[3];
1707     info->mallocs      = irecv[4];
1708   }
1709   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1710   info->fill_ratio_needed = 0;
1711   info->factor_mallocs    = 0;
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatSetOption_MPIAIJ"
1717 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1718 {
1719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1720   PetscErrorCode ierr;
1721 
1722   PetscFunctionBegin;
1723   switch (op) {
1724   case MAT_NEW_NONZERO_LOCATIONS:
1725   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1726   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1727   case MAT_KEEP_NONZERO_PATTERN:
1728   case MAT_NEW_NONZERO_LOCATION_ERR:
1729   case MAT_USE_INODES:
1730   case MAT_IGNORE_ZERO_ENTRIES:
1731     MatCheckPreallocated(A,1);
1732     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1733     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1734     break;
1735   case MAT_ROW_ORIENTED:
1736     MatCheckPreallocated(A,1);
1737     a->roworiented = flg;
1738 
1739     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1740     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1741     break;
1742   case MAT_NEW_DIAGONALS:
1743     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1744     break;
1745   case MAT_IGNORE_OFF_PROC_ENTRIES:
1746     a->donotstash = flg;
1747     break;
1748   case MAT_SPD:
1749     A->spd_set = PETSC_TRUE;
1750     A->spd     = flg;
1751     if (flg) {
1752       A->symmetric                  = PETSC_TRUE;
1753       A->structurally_symmetric     = PETSC_TRUE;
1754       A->symmetric_set              = PETSC_TRUE;
1755       A->structurally_symmetric_set = PETSC_TRUE;
1756     }
1757     break;
1758   case MAT_SYMMETRIC:
1759     MatCheckPreallocated(A,1);
1760     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1761     break;
1762   case MAT_STRUCTURALLY_SYMMETRIC:
1763     MatCheckPreallocated(A,1);
1764     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1765     break;
1766   case MAT_HERMITIAN:
1767     MatCheckPreallocated(A,1);
1768     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1769     break;
1770   case MAT_SYMMETRY_ETERNAL:
1771     MatCheckPreallocated(A,1);
1772     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1773     break;
1774   default:
1775     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1776   }
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatGetRow_MPIAIJ"
1782 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1783 {
1784   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1785   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1786   PetscErrorCode ierr;
1787   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1788   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1789   PetscInt       *cmap,*idx_p;
1790 
1791   PetscFunctionBegin;
1792   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1793   mat->getrowactive = PETSC_TRUE;
1794 
1795   if (!mat->rowvalues && (idx || v)) {
1796     /*
1797         allocate enough space to hold information from the longest row.
1798     */
1799     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1800     PetscInt   max = 1,tmp;
1801     for (i=0; i<matin->rmap->n; i++) {
1802       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1803       if (max < tmp) max = tmp;
1804     }
1805     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1806   }
1807 
1808   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1809   lrow = row - rstart;
1810 
1811   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1812   if (!v)   {pvA = 0; pvB = 0;}
1813   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1814   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1815   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1816   nztot = nzA + nzB;
1817 
1818   cmap = mat->garray;
1819   if (v  || idx) {
1820     if (nztot) {
1821       /* Sort by increasing column numbers, assuming A and B already sorted */
1822       PetscInt imark = -1;
1823       if (v) {
1824         *v = v_p = mat->rowvalues;
1825         for (i=0; i<nzB; i++) {
1826           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1827           else break;
1828         }
1829         imark = i;
1830         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1831         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1832       }
1833       if (idx) {
1834         *idx = idx_p = mat->rowindices;
1835         if (imark > -1) {
1836           for (i=0; i<imark; i++) {
1837             idx_p[i] = cmap[cworkB[i]];
1838           }
1839         } else {
1840           for (i=0; i<nzB; i++) {
1841             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1842             else break;
1843           }
1844           imark = i;
1845         }
1846         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1847         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1848       }
1849     } else {
1850       if (idx) *idx = 0;
1851       if (v)   *v   = 0;
1852     }
1853   }
1854   *nz  = nztot;
1855   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1856   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1862 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1863 {
1864   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1865 
1866   PetscFunctionBegin;
1867   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1868   aij->getrowactive = PETSC_FALSE;
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 #undef __FUNCT__
1873 #define __FUNCT__ "MatNorm_MPIAIJ"
1874 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1875 {
1876   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1877   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1878   PetscErrorCode ierr;
1879   PetscInt       i,j,cstart = mat->cmap->rstart;
1880   PetscReal      sum = 0.0;
1881   MatScalar      *v;
1882 
1883   PetscFunctionBegin;
1884   if (aij->size == 1) {
1885     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1886   } else {
1887     if (type == NORM_FROBENIUS) {
1888       v = amat->a;
1889       for (i=0; i<amat->nz; i++) {
1890         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1891       }
1892       v = bmat->a;
1893       for (i=0; i<bmat->nz; i++) {
1894         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1895       }
1896       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1897       *norm = PetscSqrtReal(*norm);
1898       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1899     } else if (type == NORM_1) { /* max column norm */
1900       PetscReal *tmp,*tmp2;
1901       PetscInt  *jj,*garray = aij->garray;
1902       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1903       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1904       *norm = 0.0;
1905       v     = amat->a; jj = amat->j;
1906       for (j=0; j<amat->nz; j++) {
1907         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1908       }
1909       v = bmat->a; jj = bmat->j;
1910       for (j=0; j<bmat->nz; j++) {
1911         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1912       }
1913       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1914       for (j=0; j<mat->cmap->N; j++) {
1915         if (tmp2[j] > *norm) *norm = tmp2[j];
1916       }
1917       ierr = PetscFree(tmp);CHKERRQ(ierr);
1918       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1919       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1920     } else if (type == NORM_INFINITY) { /* max row norm */
1921       PetscReal ntemp = 0.0;
1922       for (j=0; j<aij->A->rmap->n; j++) {
1923         v   = amat->a + amat->i[j];
1924         sum = 0.0;
1925         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1926           sum += PetscAbsScalar(*v); v++;
1927         }
1928         v = bmat->a + bmat->i[j];
1929         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1930           sum += PetscAbsScalar(*v); v++;
1931         }
1932         if (sum > ntemp) ntemp = sum;
1933       }
1934       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1935       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1936     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1937   }
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatTranspose_MPIAIJ"
1943 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1944 {
1945   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1946   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1947   PetscErrorCode ierr;
1948   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1949   PetscInt       cstart = A->cmap->rstart,ncol;
1950   Mat            B;
1951   MatScalar      *array;
1952 
1953   PetscFunctionBegin;
1954   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1955 
1956   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1957   ai = Aloc->i; aj = Aloc->j;
1958   bi = Bloc->i; bj = Bloc->j;
1959   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1960     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1961     PetscSFNode          *oloc;
1962     PETSC_UNUSED PetscSF sf;
1963 
1964     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1965     /* compute d_nnz for preallocation */
1966     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1967     for (i=0; i<ai[ma]; i++) {
1968       d_nnz[aj[i]]++;
1969       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1970     }
1971     /* compute local off-diagonal contributions */
1972     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1973     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1974     /* map those to global */
1975     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1976     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1977     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1978     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1979     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1980     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1981     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1982 
1983     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1984     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1985     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1986     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1987     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1988     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1989   } else {
1990     B    = *matout;
1991     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1992     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1993   }
1994 
1995   /* copy over the A part */
1996   array = Aloc->a;
1997   row   = A->rmap->rstart;
1998   for (i=0; i<ma; i++) {
1999     ncol = ai[i+1]-ai[i];
2000     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2001     row++;
2002     array += ncol; aj += ncol;
2003   }
2004   aj = Aloc->j;
2005   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2006 
2007   /* copy over the B part */
2008   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2009   array = Bloc->a;
2010   row   = A->rmap->rstart;
2011   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2012   cols_tmp = cols;
2013   for (i=0; i<mb; i++) {
2014     ncol = bi[i+1]-bi[i];
2015     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2016     row++;
2017     array += ncol; cols_tmp += ncol;
2018   }
2019   ierr = PetscFree(cols);CHKERRQ(ierr);
2020 
2021   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2022   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2023   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2024     *matout = B;
2025   } else {
2026     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2027   }
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 #undef __FUNCT__
2032 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2033 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2034 {
2035   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2036   Mat            a    = aij->A,b = aij->B;
2037   PetscErrorCode ierr;
2038   PetscInt       s1,s2,s3;
2039 
2040   PetscFunctionBegin;
2041   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2042   if (rr) {
2043     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2044     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2045     /* Overlap communication with computation. */
2046     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2047   }
2048   if (ll) {
2049     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2050     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2051     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2052   }
2053   /* scale  the diagonal block */
2054   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2055 
2056   if (rr) {
2057     /* Do a scatter end and then right scale the off-diagonal block */
2058     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2059     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2060   }
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 #undef __FUNCT__
2065 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2066 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2067 {
2068   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2069   PetscErrorCode ierr;
2070 
2071   PetscFunctionBegin;
2072   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatEqual_MPIAIJ"
2078 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2079 {
2080   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2081   Mat            a,b,c,d;
2082   PetscBool      flg;
2083   PetscErrorCode ierr;
2084 
2085   PetscFunctionBegin;
2086   a = matA->A; b = matA->B;
2087   c = matB->A; d = matB->B;
2088 
2089   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2090   if (flg) {
2091     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2092   }
2093   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatCopy_MPIAIJ"
2099 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2100 {
2101   PetscErrorCode ierr;
2102   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2103   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2104 
2105   PetscFunctionBegin;
2106   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2107   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2108     /* because of the column compression in the off-processor part of the matrix a->B,
2109        the number of columns in a->B and b->B may be different, hence we cannot call
2110        the MatCopy() directly on the two parts. If need be, we can provide a more
2111        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2112        then copying the submatrices */
2113     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2114   } else {
2115     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2116     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2117   }
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatSetUp_MPIAIJ"
2123 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2124 {
2125   PetscErrorCode ierr;
2126 
2127   PetscFunctionBegin;
2128   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 /*
2133    Computes the number of nonzeros per row needed for preallocation when X and Y
2134    have different nonzero structure.
2135 */
2136 #undef __FUNCT__
2137 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2138 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2139 {
2140   PetscInt       i,j,k,nzx,nzy;
2141 
2142   PetscFunctionBegin;
2143   /* Set the number of nonzeros in the new matrix */
2144   for (i=0; i<m; i++) {
2145     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2146     nzx = xi[i+1] - xi[i];
2147     nzy = yi[i+1] - yi[i];
2148     nnz[i] = 0;
2149     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2150       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2151       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2152       nnz[i]++;
2153     }
2154     for (; k<nzy; k++) nnz[i]++;
2155   }
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2162 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2163 {
2164   PetscErrorCode ierr;
2165   PetscInt       m = Y->rmap->N;
2166   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2167   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2168 
2169   PetscFunctionBegin;
2170   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatAXPY_MPIAIJ"
2176 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2177 {
2178   PetscErrorCode ierr;
2179   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2180   PetscBLASInt   bnz,one=1;
2181   Mat_SeqAIJ     *x,*y;
2182 
2183   PetscFunctionBegin;
2184   if (str == SAME_NONZERO_PATTERN) {
2185     PetscScalar alpha = a;
2186     x    = (Mat_SeqAIJ*)xx->A->data;
2187     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2188     y    = (Mat_SeqAIJ*)yy->A->data;
2189     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2190     x    = (Mat_SeqAIJ*)xx->B->data;
2191     y    = (Mat_SeqAIJ*)yy->B->data;
2192     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2193     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2194     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2195   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2196     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2197   } else {
2198     Mat      B;
2199     PetscInt *nnz_d,*nnz_o;
2200     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2201     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2202     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2203     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2204     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2205     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2206     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2207     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2208     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2209     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2210     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2211     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2212     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2213     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2214   }
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2219 
2220 #undef __FUNCT__
2221 #define __FUNCT__ "MatConjugate_MPIAIJ"
2222 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2223 {
2224 #if defined(PETSC_USE_COMPLEX)
2225   PetscErrorCode ierr;
2226   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2227 
2228   PetscFunctionBegin;
2229   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2230   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2231 #else
2232   PetscFunctionBegin;
2233 #endif
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatRealPart_MPIAIJ"
2239 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2240 {
2241   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2242   PetscErrorCode ierr;
2243 
2244   PetscFunctionBegin;
2245   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2246   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 #undef __FUNCT__
2251 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2252 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2253 {
2254   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2255   PetscErrorCode ierr;
2256 
2257   PetscFunctionBegin;
2258   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2259   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 #undef __FUNCT__
2264 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2265 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2266 {
2267   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2268   PetscErrorCode ierr;
2269   PetscInt       i,*idxb = 0;
2270   PetscScalar    *va,*vb;
2271   Vec            vtmp;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2275   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2276   if (idx) {
2277     for (i=0; i<A->rmap->n; i++) {
2278       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2279     }
2280   }
2281 
2282   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2283   if (idx) {
2284     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2285   }
2286   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2287   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2288 
2289   for (i=0; i<A->rmap->n; i++) {
2290     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2291       va[i] = vb[i];
2292       if (idx) idx[i] = a->garray[idxb[i]];
2293     }
2294   }
2295 
2296   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2297   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2298   ierr = PetscFree(idxb);CHKERRQ(ierr);
2299   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2305 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2306 {
2307   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2308   PetscErrorCode ierr;
2309   PetscInt       i,*idxb = 0;
2310   PetscScalar    *va,*vb;
2311   Vec            vtmp;
2312 
2313   PetscFunctionBegin;
2314   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2315   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2316   if (idx) {
2317     for (i=0; i<A->cmap->n; i++) {
2318       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2319     }
2320   }
2321 
2322   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2323   if (idx) {
2324     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2325   }
2326   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2327   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2328 
2329   for (i=0; i<A->rmap->n; i++) {
2330     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2331       va[i] = vb[i];
2332       if (idx) idx[i] = a->garray[idxb[i]];
2333     }
2334   }
2335 
2336   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2337   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2338   ierr = PetscFree(idxb);CHKERRQ(ierr);
2339   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2345 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2346 {
2347   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2348   PetscInt       n      = A->rmap->n;
2349   PetscInt       cstart = A->cmap->rstart;
2350   PetscInt       *cmap  = mat->garray;
2351   PetscInt       *diagIdx, *offdiagIdx;
2352   Vec            diagV, offdiagV;
2353   PetscScalar    *a, *diagA, *offdiagA;
2354   PetscInt       r;
2355   PetscErrorCode ierr;
2356 
2357   PetscFunctionBegin;
2358   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2359   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2360   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2361   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2362   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2363   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2364   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2365   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2366   for (r = 0; r < n; ++r) {
2367     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2368       a[r]   = diagA[r];
2369       idx[r] = cstart + diagIdx[r];
2370     } else {
2371       a[r]   = offdiagA[r];
2372       idx[r] = cmap[offdiagIdx[r]];
2373     }
2374   }
2375   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2376   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2377   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2378   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2379   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2380   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2386 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2387 {
2388   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2389   PetscInt       n      = A->rmap->n;
2390   PetscInt       cstart = A->cmap->rstart;
2391   PetscInt       *cmap  = mat->garray;
2392   PetscInt       *diagIdx, *offdiagIdx;
2393   Vec            diagV, offdiagV;
2394   PetscScalar    *a, *diagA, *offdiagA;
2395   PetscInt       r;
2396   PetscErrorCode ierr;
2397 
2398   PetscFunctionBegin;
2399   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2400   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2401   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2402   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2403   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2404   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2405   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2406   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2407   for (r = 0; r < n; ++r) {
2408     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2409       a[r]   = diagA[r];
2410       idx[r] = cstart + diagIdx[r];
2411     } else {
2412       a[r]   = offdiagA[r];
2413       idx[r] = cmap[offdiagIdx[r]];
2414     }
2415   }
2416   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2417   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2418   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2419   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2420   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2421   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2427 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2428 {
2429   PetscErrorCode ierr;
2430   Mat            *dummy;
2431 
2432   PetscFunctionBegin;
2433   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2434   *newmat = *dummy;
2435   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 #undef __FUNCT__
2440 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2441 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2442 {
2443   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2444   PetscErrorCode ierr;
2445 
2446   PetscFunctionBegin;
2447   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2448   A->factorerrortype = a->A->factorerrortype;
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2454 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2455 {
2456   PetscErrorCode ierr;
2457   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2458 
2459   PetscFunctionBegin;
2460   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2461   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2462   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2463   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2469 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2470 {
2471   PetscFunctionBegin;
2472   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2473   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 #undef __FUNCT__
2478 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2479 /*@
2480    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2481 
2482    Collective on Mat
2483 
2484    Input Parameters:
2485 +    A - the matrix
2486 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2487 
2488  Level: advanced
2489 
2490 @*/
2491 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2492 {
2493   PetscErrorCode       ierr;
2494 
2495   PetscFunctionBegin;
2496   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 #undef __FUNCT__
2501 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2502 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2503 {
2504   PetscErrorCode       ierr;
2505   PetscBool            sc = PETSC_FALSE,flg;
2506 
2507   PetscFunctionBegin;
2508   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2509   ierr = PetscObjectOptionsBegin((PetscObject)A);
2510     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2511     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2512     if (flg) {
2513       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2514     }
2515   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 #undef __FUNCT__
2520 #define __FUNCT__ "MatShift_MPIAIJ"
2521 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2522 {
2523   PetscErrorCode ierr;
2524   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2525   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2526 
2527   PetscFunctionBegin;
2528   if (!Y->preallocated) {
2529     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2530   } else if (!aij->nz) {
2531     PetscInt nonew = aij->nonew;
2532     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2533     aij->nonew = nonew;
2534   }
2535   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2541 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2542 {
2543   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2544   PetscErrorCode ierr;
2545 
2546   PetscFunctionBegin;
2547   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2548   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2549   if (d) {
2550     PetscInt rstart;
2551     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2552     *d += rstart;
2553 
2554   }
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 
2559 /* -------------------------------------------------------------------*/
2560 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2561                                        MatGetRow_MPIAIJ,
2562                                        MatRestoreRow_MPIAIJ,
2563                                        MatMult_MPIAIJ,
2564                                 /* 4*/ MatMultAdd_MPIAIJ,
2565                                        MatMultTranspose_MPIAIJ,
2566                                        MatMultTransposeAdd_MPIAIJ,
2567                                        0,
2568                                        0,
2569                                        0,
2570                                 /*10*/ 0,
2571                                        0,
2572                                        0,
2573                                        MatSOR_MPIAIJ,
2574                                        MatTranspose_MPIAIJ,
2575                                 /*15*/ MatGetInfo_MPIAIJ,
2576                                        MatEqual_MPIAIJ,
2577                                        MatGetDiagonal_MPIAIJ,
2578                                        MatDiagonalScale_MPIAIJ,
2579                                        MatNorm_MPIAIJ,
2580                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2581                                        MatAssemblyEnd_MPIAIJ,
2582                                        MatSetOption_MPIAIJ,
2583                                        MatZeroEntries_MPIAIJ,
2584                                 /*24*/ MatZeroRows_MPIAIJ,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                 /*29*/ MatSetUp_MPIAIJ,
2590                                        0,
2591                                        0,
2592                                        MatGetDiagonalBlock_MPIAIJ,
2593                                        0,
2594                                 /*34*/ MatDuplicate_MPIAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                 /*39*/ MatAXPY_MPIAIJ,
2600                                        MatGetSubMatrices_MPIAIJ,
2601                                        MatIncreaseOverlap_MPIAIJ,
2602                                        MatGetValues_MPIAIJ,
2603                                        MatCopy_MPIAIJ,
2604                                 /*44*/ MatGetRowMax_MPIAIJ,
2605                                        MatScale_MPIAIJ,
2606                                        MatShift_MPIAIJ,
2607                                        MatDiagonalSet_MPIAIJ,
2608                                        MatZeroRowsColumns_MPIAIJ,
2609                                 /*49*/ MatSetRandom_MPIAIJ,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2615                                        0,
2616                                        MatSetUnfactored_MPIAIJ,
2617                                        MatPermute_MPIAIJ,
2618                                        0,
2619                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2620                                        MatDestroy_MPIAIJ,
2621                                        MatView_MPIAIJ,
2622                                        0,
2623                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2624                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2625                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2630                                        MatGetRowMinAbs_MPIAIJ,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                 /*75*/ MatFDColoringApply_AIJ,
2636                                        MatSetFromOptions_MPIAIJ,
2637                                        0,
2638                                        0,
2639                                        MatFindZeroDiagonals_MPIAIJ,
2640                                 /*80*/ 0,
2641                                        0,
2642                                        0,
2643                                 /*83*/ MatLoad_MPIAIJ,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2650                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2651                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2652                                        MatPtAP_MPIAIJ_MPIAIJ,
2653                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2654                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                 /*99*/ 0,
2660                                        0,
2661                                        0,
2662                                        MatConjugate_MPIAIJ,
2663                                        0,
2664                                 /*104*/MatSetValuesRow_MPIAIJ,
2665                                        MatRealPart_MPIAIJ,
2666                                        MatImaginaryPart_MPIAIJ,
2667                                        0,
2668                                        0,
2669                                 /*109*/0,
2670                                        0,
2671                                        MatGetRowMin_MPIAIJ,
2672                                        0,
2673                                        MatMissingDiagonal_MPIAIJ,
2674                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2675                                        0,
2676                                        MatGetGhosts_MPIAIJ,
2677                                        0,
2678                                        0,
2679                                 /*119*/0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        MatGetMultiProcBlock_MPIAIJ,
2684                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2685                                        MatGetColumnNorms_MPIAIJ,
2686                                        MatInvertBlockDiagonal_MPIAIJ,
2687                                        0,
2688                                        MatGetSubMatricesMPI_MPIAIJ,
2689                                 /*129*/0,
2690                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2691                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2692                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2693                                        0,
2694                                 /*134*/0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                 /*139*/0,
2700                                        0,
2701                                        0,
2702                                        MatFDColoringSetUp_MPIXAIJ,
2703                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2704                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2705 };
2706 
2707 /* ----------------------------------------------------------------------------------------*/
2708 
2709 #undef __FUNCT__
2710 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2711 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2712 {
2713   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2714   PetscErrorCode ierr;
2715 
2716   PetscFunctionBegin;
2717   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2718   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 #undef __FUNCT__
2723 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2724 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2725 {
2726   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2727   PetscErrorCode ierr;
2728 
2729   PetscFunctionBegin;
2730   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2731   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2732   PetscFunctionReturn(0);
2733 }
2734 
2735 #undef __FUNCT__
2736 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2737 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2738 {
2739   Mat_MPIAIJ     *b;
2740   PetscErrorCode ierr;
2741 
2742   PetscFunctionBegin;
2743   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2744   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2745   b = (Mat_MPIAIJ*)B->data;
2746 
2747 #if defined(PETSC_USE_CTABLE)
2748   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2749 #else
2750   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2751 #endif
2752   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2753   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2754   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2755 
2756   /* Because the B will have been resized we simply destroy it and create a new one each time */
2757   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2758   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2759   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2760   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2761   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2762   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2763 
2764   if (!B->preallocated) {
2765     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2766     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2767     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2768     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2769     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2770   }
2771 
2772   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2773   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2774   B->preallocated  = PETSC_TRUE;
2775   B->was_assembled = PETSC_FALSE;
2776   B->assembled     = PETSC_FALSE;;
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 #undef __FUNCT__
2781 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2782 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2783 {
2784   Mat            mat;
2785   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2786   PetscErrorCode ierr;
2787 
2788   PetscFunctionBegin;
2789   *newmat = 0;
2790   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2791   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2792   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2793   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2794   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2795   a       = (Mat_MPIAIJ*)mat->data;
2796 
2797   mat->factortype   = matin->factortype;
2798   mat->assembled    = PETSC_TRUE;
2799   mat->insertmode   = NOT_SET_VALUES;
2800   mat->preallocated = PETSC_TRUE;
2801 
2802   a->size         = oldmat->size;
2803   a->rank         = oldmat->rank;
2804   a->donotstash   = oldmat->donotstash;
2805   a->roworiented  = oldmat->roworiented;
2806   a->rowindices   = 0;
2807   a->rowvalues    = 0;
2808   a->getrowactive = PETSC_FALSE;
2809 
2810   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2811   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2812 
2813   if (oldmat->colmap) {
2814 #if defined(PETSC_USE_CTABLE)
2815     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2816 #else
2817     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2818     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2819     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2820 #endif
2821   } else a->colmap = 0;
2822   if (oldmat->garray) {
2823     PetscInt len;
2824     len  = oldmat->B->cmap->n;
2825     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2826     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2827     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2828   } else a->garray = 0;
2829 
2830   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2831   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2832   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2833   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2834   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2835   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2836   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2837   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2838   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2839   *newmat = mat;
2840   PetscFunctionReturn(0);
2841 }
2842 
2843 
2844 
2845 #undef __FUNCT__
2846 #define __FUNCT__ "MatLoad_MPIAIJ"
2847 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2848 {
2849   PetscScalar    *vals,*svals;
2850   MPI_Comm       comm;
2851   PetscErrorCode ierr;
2852   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2853   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2854   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2855   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2856   PetscInt       cend,cstart,n,*rowners;
2857   int            fd;
2858   PetscInt       bs = newMat->rmap->bs;
2859 
2860   PetscFunctionBegin;
2861   /* force binary viewer to load .info file if it has not yet done so */
2862   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2863   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2864   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2865   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2866   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2867   if (!rank) {
2868     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2869     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2870     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2871   }
2872 
2873   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2874   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2875   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2876   if (bs < 0) bs = 1;
2877 
2878   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2879   M    = header[1]; N = header[2];
2880 
2881   /* If global sizes are set, check if they are consistent with that given in the file */
2882   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2883   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2884 
2885   /* determine ownership of all (block) rows */
2886   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2887   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2888   else m = newMat->rmap->n; /* Set by user */
2889 
2890   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2891   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2892 
2893   /* First process needs enough room for process with most rows */
2894   if (!rank) {
2895     mmax = rowners[1];
2896     for (i=2; i<=size; i++) {
2897       mmax = PetscMax(mmax, rowners[i]);
2898     }
2899   } else mmax = -1;             /* unused, but compilers complain */
2900 
2901   rowners[0] = 0;
2902   for (i=2; i<=size; i++) {
2903     rowners[i] += rowners[i-1];
2904   }
2905   rstart = rowners[rank];
2906   rend   = rowners[rank+1];
2907 
2908   /* distribute row lengths to all processors */
2909   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2910   if (!rank) {
2911     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2912     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2913     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2914     for (j=0; j<m; j++) {
2915       procsnz[0] += ourlens[j];
2916     }
2917     for (i=1; i<size; i++) {
2918       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2919       /* calculate the number of nonzeros on each processor */
2920       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2921         procsnz[i] += rowlengths[j];
2922       }
2923       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2924     }
2925     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2926   } else {
2927     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2928   }
2929 
2930   if (!rank) {
2931     /* determine max buffer needed and allocate it */
2932     maxnz = 0;
2933     for (i=0; i<size; i++) {
2934       maxnz = PetscMax(maxnz,procsnz[i]);
2935     }
2936     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2937 
2938     /* read in my part of the matrix column indices  */
2939     nz   = procsnz[0];
2940     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2941     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2942 
2943     /* read in every one elses and ship off */
2944     for (i=1; i<size; i++) {
2945       nz   = procsnz[i];
2946       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2947       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2948     }
2949     ierr = PetscFree(cols);CHKERRQ(ierr);
2950   } else {
2951     /* determine buffer space needed for message */
2952     nz = 0;
2953     for (i=0; i<m; i++) {
2954       nz += ourlens[i];
2955     }
2956     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2957 
2958     /* receive message of column indices*/
2959     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2960   }
2961 
2962   /* determine column ownership if matrix is not square */
2963   if (N != M) {
2964     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2965     else n = newMat->cmap->n;
2966     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2967     cstart = cend - n;
2968   } else {
2969     cstart = rstart;
2970     cend   = rend;
2971     n      = cend - cstart;
2972   }
2973 
2974   /* loop over local rows, determining number of off diagonal entries */
2975   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2976   jj   = 0;
2977   for (i=0; i<m; i++) {
2978     for (j=0; j<ourlens[i]; j++) {
2979       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2980       jj++;
2981     }
2982   }
2983 
2984   for (i=0; i<m; i++) {
2985     ourlens[i] -= offlens[i];
2986   }
2987   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2988 
2989   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2990 
2991   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2992 
2993   for (i=0; i<m; i++) {
2994     ourlens[i] += offlens[i];
2995   }
2996 
2997   if (!rank) {
2998     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2999 
3000     /* read in my part of the matrix numerical values  */
3001     nz   = procsnz[0];
3002     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3003 
3004     /* insert into matrix */
3005     jj      = rstart;
3006     smycols = mycols;
3007     svals   = vals;
3008     for (i=0; i<m; i++) {
3009       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3010       smycols += ourlens[i];
3011       svals   += ourlens[i];
3012       jj++;
3013     }
3014 
3015     /* read in other processors and ship out */
3016     for (i=1; i<size; i++) {
3017       nz   = procsnz[i];
3018       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3019       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3020     }
3021     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3022   } else {
3023     /* receive numeric values */
3024     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3025 
3026     /* receive message of values*/
3027     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3028 
3029     /* insert into matrix */
3030     jj      = rstart;
3031     smycols = mycols;
3032     svals   = vals;
3033     for (i=0; i<m; i++) {
3034       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3035       smycols += ourlens[i];
3036       svals   += ourlens[i];
3037       jj++;
3038     }
3039   }
3040   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3041   ierr = PetscFree(vals);CHKERRQ(ierr);
3042   ierr = PetscFree(mycols);CHKERRQ(ierr);
3043   ierr = PetscFree(rowners);CHKERRQ(ierr);
3044   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3045   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3046   PetscFunctionReturn(0);
3047 }
3048 
3049 #undef __FUNCT__
3050 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3051 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3052 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3053 {
3054   PetscErrorCode ierr;
3055   IS             iscol_local;
3056   PetscInt       csize;
3057 
3058   PetscFunctionBegin;
3059   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3060   if (call == MAT_REUSE_MATRIX) {
3061     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3062     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3063   } else {
3064     /* check if we are grabbing all columns*/
3065     PetscBool    isstride;
3066     PetscMPIInt  lisstride = 0,gisstride;
3067     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3068     if (isstride) {
3069       PetscInt  start,len,mstart,mlen;
3070       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3071       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3072       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3073       if (mstart == start && mlen-mstart == len) lisstride = 1;
3074     }
3075     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3076     if (gisstride) {
3077       PetscInt N;
3078       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3079       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3080       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3081       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3082     } else {
3083       PetscInt cbs;
3084       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3085       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3086       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3087     }
3088   }
3089   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3090   if (call == MAT_INITIAL_MATRIX) {
3091     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3092     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3093   }
3094   PetscFunctionReturn(0);
3095 }
3096 
3097 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3098 #undef __FUNCT__
3099 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3100 /*
3101     Not great since it makes two copies of the submatrix, first an SeqAIJ
3102   in local and then by concatenating the local matrices the end result.
3103   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3104 
3105   Note: This requires a sequential iscol with all indices.
3106 */
3107 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3108 {
3109   PetscErrorCode ierr;
3110   PetscMPIInt    rank,size;
3111   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3112   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3113   PetscBool      allcolumns, colflag;
3114   Mat            M,Mreuse;
3115   MatScalar      *vwork,*aa;
3116   MPI_Comm       comm;
3117   Mat_SeqAIJ     *aij;
3118 
3119   PetscFunctionBegin;
3120   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3121   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3122   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3123 
3124   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3125   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3126   if (colflag && ncol == mat->cmap->N) {
3127     allcolumns = PETSC_TRUE;
3128     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3129   } else {
3130     allcolumns = PETSC_FALSE;
3131   }
3132   if (call ==  MAT_REUSE_MATRIX) {
3133     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3134     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3135     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3136   } else {
3137     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3138   }
3139 
3140   /*
3141       m - number of local rows
3142       n - number of columns (same on all processors)
3143       rstart - first row in new global matrix generated
3144   */
3145   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3146   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3147   if (call == MAT_INITIAL_MATRIX) {
3148     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3149     ii  = aij->i;
3150     jj  = aij->j;
3151 
3152     /*
3153         Determine the number of non-zeros in the diagonal and off-diagonal
3154         portions of the matrix in order to do correct preallocation
3155     */
3156 
3157     /* first get start and end of "diagonal" columns */
3158     if (csize == PETSC_DECIDE) {
3159       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3160       if (mglobal == n) { /* square matrix */
3161         nlocal = m;
3162       } else {
3163         nlocal = n/size + ((n % size) > rank);
3164       }
3165     } else {
3166       nlocal = csize;
3167     }
3168     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3169     rstart = rend - nlocal;
3170     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3171 
3172     /* next, compute all the lengths */
3173     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3174     olens = dlens + m;
3175     for (i=0; i<m; i++) {
3176       jend = ii[i+1] - ii[i];
3177       olen = 0;
3178       dlen = 0;
3179       for (j=0; j<jend; j++) {
3180         if (*jj < rstart || *jj >= rend) olen++;
3181         else dlen++;
3182         jj++;
3183       }
3184       olens[i] = olen;
3185       dlens[i] = dlen;
3186     }
3187     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3188     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3189     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3190     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3191     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3192     ierr = PetscFree(dlens);CHKERRQ(ierr);
3193   } else {
3194     PetscInt ml,nl;
3195 
3196     M    = *newmat;
3197     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3198     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3199     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3200     /*
3201          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3202        rather than the slower MatSetValues().
3203     */
3204     M->was_assembled = PETSC_TRUE;
3205     M->assembled     = PETSC_FALSE;
3206   }
3207   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3208   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3209   ii   = aij->i;
3210   jj   = aij->j;
3211   aa   = aij->a;
3212   for (i=0; i<m; i++) {
3213     row   = rstart + i;
3214     nz    = ii[i+1] - ii[i];
3215     cwork = jj;     jj += nz;
3216     vwork = aa;     aa += nz;
3217     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3218   }
3219 
3220   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3221   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3222   *newmat = M;
3223 
3224   /* save submatrix used in processor for next request */
3225   if (call ==  MAT_INITIAL_MATRIX) {
3226     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3227     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3228   }
3229   PetscFunctionReturn(0);
3230 }
3231 
3232 #undef __FUNCT__
3233 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3234 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3235 {
3236   PetscInt       m,cstart, cend,j,nnz,i,d;
3237   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3238   const PetscInt *JJ;
3239   PetscScalar    *values;
3240   PetscErrorCode ierr;
3241 
3242   PetscFunctionBegin;
3243   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3244 
3245   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3246   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3247   m      = B->rmap->n;
3248   cstart = B->cmap->rstart;
3249   cend   = B->cmap->rend;
3250   rstart = B->rmap->rstart;
3251 
3252   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3253 
3254 #if defined(PETSC_USE_DEBUGGING)
3255   for (i=0; i<m; i++) {
3256     nnz = Ii[i+1]- Ii[i];
3257     JJ  = J + Ii[i];
3258     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3259     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3260     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3261   }
3262 #endif
3263 
3264   for (i=0; i<m; i++) {
3265     nnz     = Ii[i+1]- Ii[i];
3266     JJ      = J + Ii[i];
3267     nnz_max = PetscMax(nnz_max,nnz);
3268     d       = 0;
3269     for (j=0; j<nnz; j++) {
3270       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3271     }
3272     d_nnz[i] = d;
3273     o_nnz[i] = nnz - d;
3274   }
3275   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3276   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3277 
3278   if (v) values = (PetscScalar*)v;
3279   else {
3280     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3281   }
3282 
3283   for (i=0; i<m; i++) {
3284     ii   = i + rstart;
3285     nnz  = Ii[i+1]- Ii[i];
3286     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3287   }
3288   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3290 
3291   if (!v) {
3292     ierr = PetscFree(values);CHKERRQ(ierr);
3293   }
3294   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3295   PetscFunctionReturn(0);
3296 }
3297 
3298 #undef __FUNCT__
3299 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3300 /*@
3301    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3302    (the default parallel PETSc format).
3303 
3304    Collective on MPI_Comm
3305 
3306    Input Parameters:
3307 +  B - the matrix
3308 .  i - the indices into j for the start of each local row (starts with zero)
3309 .  j - the column indices for each local row (starts with zero)
3310 -  v - optional values in the matrix
3311 
3312    Level: developer
3313 
3314    Notes:
3315        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3316      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3317      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3318 
3319        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3320 
3321        The format which is used for the sparse matrix input, is equivalent to a
3322     row-major ordering.. i.e for the following matrix, the input data expected is
3323     as shown
3324 
3325 $        1 0 0
3326 $        2 0 3     P0
3327 $       -------
3328 $        4 5 6     P1
3329 $
3330 $     Process0 [P0]: rows_owned=[0,1]
3331 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3332 $        j =  {0,0,2}  [size = 3]
3333 $        v =  {1,2,3}  [size = 3]
3334 $
3335 $     Process1 [P1]: rows_owned=[2]
3336 $        i =  {0,3}    [size = nrow+1  = 1+1]
3337 $        j =  {0,1,2}  [size = 3]
3338 $        v =  {4,5,6}  [size = 3]
3339 
3340 .keywords: matrix, aij, compressed row, sparse, parallel
3341 
3342 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3343           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3344 @*/
3345 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3346 {
3347   PetscErrorCode ierr;
3348 
3349   PetscFunctionBegin;
3350   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3351   PetscFunctionReturn(0);
3352 }
3353 
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3356 /*@C
3357    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3358    (the default parallel PETSc format).  For good matrix assembly performance
3359    the user should preallocate the matrix storage by setting the parameters
3360    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3361    performance can be increased by more than a factor of 50.
3362 
3363    Collective on MPI_Comm
3364 
3365    Input Parameters:
3366 +  B - the matrix
3367 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3368            (same value is used for all local rows)
3369 .  d_nnz - array containing the number of nonzeros in the various rows of the
3370            DIAGONAL portion of the local submatrix (possibly different for each row)
3371            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3372            The size of this array is equal to the number of local rows, i.e 'm'.
3373            For matrices that will be factored, you must leave room for (and set)
3374            the diagonal entry even if it is zero.
3375 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3376            submatrix (same value is used for all local rows).
3377 -  o_nnz - array containing the number of nonzeros in the various rows of the
3378            OFF-DIAGONAL portion of the local submatrix (possibly different for
3379            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3380            structure. The size of this array is equal to the number
3381            of local rows, i.e 'm'.
3382 
3383    If the *_nnz parameter is given then the *_nz parameter is ignored
3384 
3385    The AIJ format (also called the Yale sparse matrix format or
3386    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3387    storage.  The stored row and column indices begin with zero.
3388    See Users-Manual: ch_mat for details.
3389 
3390    The parallel matrix is partitioned such that the first m0 rows belong to
3391    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3392    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3393 
3394    The DIAGONAL portion of the local submatrix of a processor can be defined
3395    as the submatrix which is obtained by extraction the part corresponding to
3396    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3397    first row that belongs to the processor, r2 is the last row belonging to
3398    the this processor, and c1-c2 is range of indices of the local part of a
3399    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3400    common case of a square matrix, the row and column ranges are the same and
3401    the DIAGONAL part is also square. The remaining portion of the local
3402    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3403 
3404    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3405 
3406    You can call MatGetInfo() to get information on how effective the preallocation was;
3407    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3408    You can also run with the option -info and look for messages with the string
3409    malloc in them to see if additional memory allocation was needed.
3410 
3411    Example usage:
3412 
3413    Consider the following 8x8 matrix with 34 non-zero values, that is
3414    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3415    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3416    as follows:
3417 
3418 .vb
3419             1  2  0  |  0  3  0  |  0  4
3420     Proc0   0  5  6  |  7  0  0  |  8  0
3421             9  0 10  | 11  0  0  | 12  0
3422     -------------------------------------
3423            13  0 14  | 15 16 17  |  0  0
3424     Proc1   0 18  0  | 19 20 21  |  0  0
3425             0  0  0  | 22 23  0  | 24  0
3426     -------------------------------------
3427     Proc2  25 26 27  |  0  0 28  | 29  0
3428            30  0  0  | 31 32 33  |  0 34
3429 .ve
3430 
3431    This can be represented as a collection of submatrices as:
3432 
3433 .vb
3434       A B C
3435       D E F
3436       G H I
3437 .ve
3438 
3439    Where the submatrices A,B,C are owned by proc0, D,E,F are
3440    owned by proc1, G,H,I are owned by proc2.
3441 
3442    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3443    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3444    The 'M','N' parameters are 8,8, and have the same values on all procs.
3445 
3446    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3447    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3448    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3449    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3450    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3451    matrix, ans [DF] as another SeqAIJ matrix.
3452 
3453    When d_nz, o_nz parameters are specified, d_nz storage elements are
3454    allocated for every row of the local diagonal submatrix, and o_nz
3455    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3456    One way to choose d_nz and o_nz is to use the max nonzerors per local
3457    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3458    In this case, the values of d_nz,o_nz are:
3459 .vb
3460      proc0 : dnz = 2, o_nz = 2
3461      proc1 : dnz = 3, o_nz = 2
3462      proc2 : dnz = 1, o_nz = 4
3463 .ve
3464    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3465    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3466    for proc3. i.e we are using 12+15+10=37 storage locations to store
3467    34 values.
3468 
3469    When d_nnz, o_nnz parameters are specified, the storage is specified
3470    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3471    In the above case the values for d_nnz,o_nnz are:
3472 .vb
3473      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3474      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3475      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3476 .ve
3477    Here the space allocated is sum of all the above values i.e 34, and
3478    hence pre-allocation is perfect.
3479 
3480    Level: intermediate
3481 
3482 .keywords: matrix, aij, compressed row, sparse, parallel
3483 
3484 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3485           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3486 @*/
3487 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3488 {
3489   PetscErrorCode ierr;
3490 
3491   PetscFunctionBegin;
3492   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3493   PetscValidType(B,1);
3494   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3495   PetscFunctionReturn(0);
3496 }
3497 
3498 #undef __FUNCT__
3499 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3500 /*@
3501      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3502          CSR format the local rows.
3503 
3504    Collective on MPI_Comm
3505 
3506    Input Parameters:
3507 +  comm - MPI communicator
3508 .  m - number of local rows (Cannot be PETSC_DECIDE)
3509 .  n - This value should be the same as the local size used in creating the
3510        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3511        calculated if N is given) For square matrices n is almost always m.
3512 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3513 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3514 .   i - row indices
3515 .   j - column indices
3516 -   a - matrix values
3517 
3518    Output Parameter:
3519 .   mat - the matrix
3520 
3521    Level: intermediate
3522 
3523    Notes:
3524        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3525      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3526      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3527 
3528        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3529 
3530        The format which is used for the sparse matrix input, is equivalent to a
3531     row-major ordering.. i.e for the following matrix, the input data expected is
3532     as shown
3533 
3534 $        1 0 0
3535 $        2 0 3     P0
3536 $       -------
3537 $        4 5 6     P1
3538 $
3539 $     Process0 [P0]: rows_owned=[0,1]
3540 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3541 $        j =  {0,0,2}  [size = 3]
3542 $        v =  {1,2,3}  [size = 3]
3543 $
3544 $     Process1 [P1]: rows_owned=[2]
3545 $        i =  {0,3}    [size = nrow+1  = 1+1]
3546 $        j =  {0,1,2}  [size = 3]
3547 $        v =  {4,5,6}  [size = 3]
3548 
3549 .keywords: matrix, aij, compressed row, sparse, parallel
3550 
3551 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3552           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3553 @*/
3554 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3555 {
3556   PetscErrorCode ierr;
3557 
3558   PetscFunctionBegin;
3559   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3560   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3561   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3562   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3563   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3564   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3565   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3566   PetscFunctionReturn(0);
3567 }
3568 
3569 #undef __FUNCT__
3570 #define __FUNCT__ "MatCreateAIJ"
3571 /*@C
3572    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3573    (the default parallel PETSc format).  For good matrix assembly performance
3574    the user should preallocate the matrix storage by setting the parameters
3575    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3576    performance can be increased by more than a factor of 50.
3577 
3578    Collective on MPI_Comm
3579 
3580    Input Parameters:
3581 +  comm - MPI communicator
3582 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3583            This value should be the same as the local size used in creating the
3584            y vector for the matrix-vector product y = Ax.
3585 .  n - This value should be the same as the local size used in creating the
3586        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3587        calculated if N is given) For square matrices n is almost always m.
3588 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3589 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3590 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3591            (same value is used for all local rows)
3592 .  d_nnz - array containing the number of nonzeros in the various rows of the
3593            DIAGONAL portion of the local submatrix (possibly different for each row)
3594            or NULL, if d_nz is used to specify the nonzero structure.
3595            The size of this array is equal to the number of local rows, i.e 'm'.
3596 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3597            submatrix (same value is used for all local rows).
3598 -  o_nnz - array containing the number of nonzeros in the various rows of the
3599            OFF-DIAGONAL portion of the local submatrix (possibly different for
3600            each row) or NULL, if o_nz is used to specify the nonzero
3601            structure. The size of this array is equal to the number
3602            of local rows, i.e 'm'.
3603 
3604    Output Parameter:
3605 .  A - the matrix
3606 
3607    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3608    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3609    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3610 
3611    Notes:
3612    If the *_nnz parameter is given then the *_nz parameter is ignored
3613 
3614    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3615    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3616    storage requirements for this matrix.
3617 
3618    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3619    processor than it must be used on all processors that share the object for
3620    that argument.
3621 
3622    The user MUST specify either the local or global matrix dimensions
3623    (possibly both).
3624 
3625    The parallel matrix is partitioned across processors such that the
3626    first m0 rows belong to process 0, the next m1 rows belong to
3627    process 1, the next m2 rows belong to process 2 etc.. where
3628    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3629    values corresponding to [m x N] submatrix.
3630 
3631    The columns are logically partitioned with the n0 columns belonging
3632    to 0th partition, the next n1 columns belonging to the next
3633    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3634 
3635    The DIAGONAL portion of the local submatrix on any given processor
3636    is the submatrix corresponding to the rows and columns m,n
3637    corresponding to the given processor. i.e diagonal matrix on
3638    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3639    etc. The remaining portion of the local submatrix [m x (N-n)]
3640    constitute the OFF-DIAGONAL portion. The example below better
3641    illustrates this concept.
3642 
3643    For a square global matrix we define each processor's diagonal portion
3644    to be its local rows and the corresponding columns (a square submatrix);
3645    each processor's off-diagonal portion encompasses the remainder of the
3646    local matrix (a rectangular submatrix).
3647 
3648    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3649 
3650    When calling this routine with a single process communicator, a matrix of
3651    type SEQAIJ is returned.  If a matrix of type MATMPIAIJ is desired for this
3652    type of communicator, use the construction mechanism:
3653      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3654 
3655    By default, this format uses inodes (identical nodes) when possible.
3656    We search for consecutive rows with the same nonzero structure, thereby
3657    reusing matrix information to achieve increased efficiency.
3658 
3659    Options Database Keys:
3660 +  -mat_no_inode  - Do not use inodes
3661 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3662 -  -mat_aij_oneindex - Internally use indexing starting at 1
3663         rather than 0.  Note that when calling MatSetValues(),
3664         the user still MUST index entries starting at 0!
3665 
3666 
3667    Example usage:
3668 
3669    Consider the following 8x8 matrix with 34 non-zero values, that is
3670    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3671    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3672    as follows:
3673 
3674 .vb
3675             1  2  0  |  0  3  0  |  0  4
3676     Proc0   0  5  6  |  7  0  0  |  8  0
3677             9  0 10  | 11  0  0  | 12  0
3678     -------------------------------------
3679            13  0 14  | 15 16 17  |  0  0
3680     Proc1   0 18  0  | 19 20 21  |  0  0
3681             0  0  0  | 22 23  0  | 24  0
3682     -------------------------------------
3683     Proc2  25 26 27  |  0  0 28  | 29  0
3684            30  0  0  | 31 32 33  |  0 34
3685 .ve
3686 
3687    This can be represented as a collection of submatrices as:
3688 
3689 .vb
3690       A B C
3691       D E F
3692       G H I
3693 .ve
3694 
3695    Where the submatrices A,B,C are owned by proc0, D,E,F are
3696    owned by proc1, G,H,I are owned by proc2.
3697 
3698    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3699    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3700    The 'M','N' parameters are 8,8, and have the same values on all procs.
3701 
3702    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3703    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3704    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3705    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3706    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3707    matrix, ans [DF] as another SeqAIJ matrix.
3708 
3709    When d_nz, o_nz parameters are specified, d_nz storage elements are
3710    allocated for every row of the local diagonal submatrix, and o_nz
3711    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3712    One way to choose d_nz and o_nz is to use the max nonzerors per local
3713    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3714    In this case, the values of d_nz,o_nz are:
3715 .vb
3716      proc0 : dnz = 2, o_nz = 2
3717      proc1 : dnz = 3, o_nz = 2
3718      proc2 : dnz = 1, o_nz = 4
3719 .ve
3720    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3721    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3722    for proc3. i.e we are using 12+15+10=37 storage locations to store
3723    34 values.
3724 
3725    When d_nnz, o_nnz parameters are specified, the storage is specified
3726    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3727    In the above case the values for d_nnz,o_nnz are:
3728 .vb
3729      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3730      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3731      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3732 .ve
3733    Here the space allocated is sum of all the above values i.e 34, and
3734    hence pre-allocation is perfect.
3735 
3736    Level: intermediate
3737 
3738 .keywords: matrix, aij, compressed row, sparse, parallel
3739 
3740 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3741           MATMPIAIJ, MatCreateMPIAIJWithArrays()
3742 @*/
3743 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3744 {
3745   PetscErrorCode ierr;
3746   PetscMPIInt    size;
3747 
3748   PetscFunctionBegin;
3749   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3750   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3751   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3752   if (size > 1) {
3753     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3754     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3755   } else {
3756     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3757     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3758   }
3759   PetscFunctionReturn(0);
3760 }
3761 
3762 #undef __FUNCT__
3763 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3764 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3765 {
3766   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3767   PetscBool      flg;
3768   PetscErrorCode ierr;
3769 
3770   PetscFunctionBegin;
3771   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3772   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
3773   if (Ad)     *Ad     = a->A;
3774   if (Ao)     *Ao     = a->B;
3775   if (colmap) *colmap = a->garray;
3776   PetscFunctionReturn(0);
3777 }
3778 
3779 #undef __FUNCT__
3780 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3781 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3782 {
3783   PetscErrorCode ierr;
3784   PetscInt       m,N,i,rstart,nnz,Ii;
3785   PetscInt       *indx;
3786   PetscScalar    *values;
3787 
3788   PetscFunctionBegin;
3789   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3790   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3791     PetscInt       *dnz,*onz,sum,bs,cbs;
3792 
3793     if (n == PETSC_DECIDE) {
3794       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3795     }
3796     /* Check sum(n) = N */
3797     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3798     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3799 
3800     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3801     rstart -= m;
3802 
3803     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3804     for (i=0; i<m; i++) {
3805       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3806       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3807       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3808     }
3809 
3810     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3811     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3812     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3813     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3814     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3815     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3816     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3817   }
3818 
3819   /* numeric phase */
3820   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3821   for (i=0; i<m; i++) {
3822     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3823     Ii   = i + rstart;
3824     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3825     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3826   }
3827   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3828   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3829   PetscFunctionReturn(0);
3830 }
3831 
3832 #undef __FUNCT__
3833 #define __FUNCT__ "MatFileSplit"
3834 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3835 {
3836   PetscErrorCode    ierr;
3837   PetscMPIInt       rank;
3838   PetscInt          m,N,i,rstart,nnz;
3839   size_t            len;
3840   const PetscInt    *indx;
3841   PetscViewer       out;
3842   char              *name;
3843   Mat               B;
3844   const PetscScalar *values;
3845 
3846   PetscFunctionBegin;
3847   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3848   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3849   /* Should this be the type of the diagonal block of A? */
3850   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3851   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3852   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3853   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3854   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3855   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3856   for (i=0; i<m; i++) {
3857     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3858     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3859     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3860   }
3861   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3862   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3863 
3864   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3865   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3866   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3867   sprintf(name,"%s.%d",outfile,rank);
3868   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3869   ierr = PetscFree(name);CHKERRQ(ierr);
3870   ierr = MatView(B,out);CHKERRQ(ierr);
3871   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3872   ierr = MatDestroy(&B);CHKERRQ(ierr);
3873   PetscFunctionReturn(0);
3874 }
3875 
3876 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3877 #undef __FUNCT__
3878 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3879 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3880 {
3881   PetscErrorCode      ierr;
3882   Mat_Merge_SeqsToMPI *merge;
3883   PetscContainer      container;
3884 
3885   PetscFunctionBegin;
3886   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3887   if (container) {
3888     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3889     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3890     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3891     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3892     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3893     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3894     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3895     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3896     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3897     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3898     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3899     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3900     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3901     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3902     ierr = PetscFree(merge);CHKERRQ(ierr);
3903     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3904   }
3905   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3906   PetscFunctionReturn(0);
3907 }
3908 
3909 #include <../src/mat/utils/freespace.h>
3910 #include <petscbt.h>
3911 
3912 #undef __FUNCT__
3913 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3914 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3915 {
3916   PetscErrorCode      ierr;
3917   MPI_Comm            comm;
3918   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3919   PetscMPIInt         size,rank,taga,*len_s;
3920   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3921   PetscInt            proc,m;
3922   PetscInt            **buf_ri,**buf_rj;
3923   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3924   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3925   MPI_Request         *s_waits,*r_waits;
3926   MPI_Status          *status;
3927   MatScalar           *aa=a->a;
3928   MatScalar           **abuf_r,*ba_i;
3929   Mat_Merge_SeqsToMPI *merge;
3930   PetscContainer      container;
3931 
3932   PetscFunctionBegin;
3933   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3934   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3935 
3936   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3937   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3938 
3939   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3940   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3941 
3942   bi     = merge->bi;
3943   bj     = merge->bj;
3944   buf_ri = merge->buf_ri;
3945   buf_rj = merge->buf_rj;
3946 
3947   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3948   owners = merge->rowmap->range;
3949   len_s  = merge->len_s;
3950 
3951   /* send and recv matrix values */
3952   /*-----------------------------*/
3953   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3954   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3955 
3956   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
3957   for (proc=0,k=0; proc<size; proc++) {
3958     if (!len_s[proc]) continue;
3959     i    = owners[proc];
3960     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3961     k++;
3962   }
3963 
3964   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3965   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3966   ierr = PetscFree(status);CHKERRQ(ierr);
3967 
3968   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3969   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3970 
3971   /* insert mat values of mpimat */
3972   /*----------------------------*/
3973   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
3974   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
3975 
3976   for (k=0; k<merge->nrecv; k++) {
3977     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3978     nrows       = *(buf_ri_k[k]);
3979     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3980     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
3981   }
3982 
3983   /* set values of ba */
3984   m = merge->rowmap->n;
3985   for (i=0; i<m; i++) {
3986     arow = owners[rank] + i;
3987     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3988     bnzi = bi[i+1] - bi[i];
3989     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3990 
3991     /* add local non-zero vals of this proc's seqmat into ba */
3992     anzi   = ai[arow+1] - ai[arow];
3993     aj     = a->j + ai[arow];
3994     aa     = a->a + ai[arow];
3995     nextaj = 0;
3996     for (j=0; nextaj<anzi; j++) {
3997       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
3998         ba_i[j] += aa[nextaj++];
3999       }
4000     }
4001 
4002     /* add received vals into ba */
4003     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4004       /* i-th row */
4005       if (i == *nextrow[k]) {
4006         anzi   = *(nextai[k]+1) - *nextai[k];
4007         aj     = buf_rj[k] + *(nextai[k]);
4008         aa     = abuf_r[k] + *(nextai[k]);
4009         nextaj = 0;
4010         for (j=0; nextaj<anzi; j++) {
4011           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4012             ba_i[j] += aa[nextaj++];
4013           }
4014         }
4015         nextrow[k]++; nextai[k]++;
4016       }
4017     }
4018     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4019   }
4020   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4021   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4022 
4023   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4024   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4025   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4026   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4027   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4028   PetscFunctionReturn(0);
4029 }
4030 
4031 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4032 
4033 #undef __FUNCT__
4034 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4035 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4036 {
4037   PetscErrorCode      ierr;
4038   Mat                 B_mpi;
4039   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4040   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4041   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4042   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4043   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4044   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4045   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4046   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4047   MPI_Status          *status;
4048   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4049   PetscBT             lnkbt;
4050   Mat_Merge_SeqsToMPI *merge;
4051   PetscContainer      container;
4052 
4053   PetscFunctionBegin;
4054   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4055 
4056   /* make sure it is a PETSc comm */
4057   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4058   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4059   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4060 
4061   ierr = PetscNew(&merge);CHKERRQ(ierr);
4062   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4063 
4064   /* determine row ownership */
4065   /*---------------------------------------------------------*/
4066   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4067   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4068   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4069   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4070   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4071   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4072   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4073 
4074   m      = merge->rowmap->n;
4075   owners = merge->rowmap->range;
4076 
4077   /* determine the number of messages to send, their lengths */
4078   /*---------------------------------------------------------*/
4079   len_s = merge->len_s;
4080 
4081   len          = 0; /* length of buf_si[] */
4082   merge->nsend = 0;
4083   for (proc=0; proc<size; proc++) {
4084     len_si[proc] = 0;
4085     if (proc == rank) {
4086       len_s[proc] = 0;
4087     } else {
4088       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4089       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4090     }
4091     if (len_s[proc]) {
4092       merge->nsend++;
4093       nrows = 0;
4094       for (i=owners[proc]; i<owners[proc+1]; i++) {
4095         if (ai[i+1] > ai[i]) nrows++;
4096       }
4097       len_si[proc] = 2*(nrows+1);
4098       len         += len_si[proc];
4099     }
4100   }
4101 
4102   /* determine the number and length of messages to receive for ij-structure */
4103   /*-------------------------------------------------------------------------*/
4104   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4105   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4106 
4107   /* post the Irecv of j-structure */
4108   /*-------------------------------*/
4109   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4110   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4111 
4112   /* post the Isend of j-structure */
4113   /*--------------------------------*/
4114   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4115 
4116   for (proc=0, k=0; proc<size; proc++) {
4117     if (!len_s[proc]) continue;
4118     i    = owners[proc];
4119     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4120     k++;
4121   }
4122 
4123   /* receives and sends of j-structure are complete */
4124   /*------------------------------------------------*/
4125   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4126   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4127 
4128   /* send and recv i-structure */
4129   /*---------------------------*/
4130   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4131   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4132 
4133   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4134   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4135   for (proc=0,k=0; proc<size; proc++) {
4136     if (!len_s[proc]) continue;
4137     /* form outgoing message for i-structure:
4138          buf_si[0]:                 nrows to be sent
4139                [1:nrows]:           row index (global)
4140                [nrows+1:2*nrows+1]: i-structure index
4141     */
4142     /*-------------------------------------------*/
4143     nrows       = len_si[proc]/2 - 1;
4144     buf_si_i    = buf_si + nrows+1;
4145     buf_si[0]   = nrows;
4146     buf_si_i[0] = 0;
4147     nrows       = 0;
4148     for (i=owners[proc]; i<owners[proc+1]; i++) {
4149       anzi = ai[i+1] - ai[i];
4150       if (anzi) {
4151         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4152         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4153         nrows++;
4154       }
4155     }
4156     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4157     k++;
4158     buf_si += len_si[proc];
4159   }
4160 
4161   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4162   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4163 
4164   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4165   for (i=0; i<merge->nrecv; i++) {
4166     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4167   }
4168 
4169   ierr = PetscFree(len_si);CHKERRQ(ierr);
4170   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4171   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4172   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4173   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4174   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4175   ierr = PetscFree(status);CHKERRQ(ierr);
4176 
4177   /* compute a local seq matrix in each processor */
4178   /*----------------------------------------------*/
4179   /* allocate bi array and free space for accumulating nonzero column info */
4180   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4181   bi[0] = 0;
4182 
4183   /* create and initialize a linked list */
4184   nlnk = N+1;
4185   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4186 
4187   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4188   len  = ai[owners[rank+1]] - ai[owners[rank]];
4189   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4190 
4191   current_space = free_space;
4192 
4193   /* determine symbolic info for each local row */
4194   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4195 
4196   for (k=0; k<merge->nrecv; k++) {
4197     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4198     nrows       = *buf_ri_k[k];
4199     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4200     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4201   }
4202 
4203   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4204   len  = 0;
4205   for (i=0; i<m; i++) {
4206     bnzi = 0;
4207     /* add local non-zero cols of this proc's seqmat into lnk */
4208     arow  = owners[rank] + i;
4209     anzi  = ai[arow+1] - ai[arow];
4210     aj    = a->j + ai[arow];
4211     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4212     bnzi += nlnk;
4213     /* add received col data into lnk */
4214     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4215       if (i == *nextrow[k]) { /* i-th row */
4216         anzi  = *(nextai[k]+1) - *nextai[k];
4217         aj    = buf_rj[k] + *nextai[k];
4218         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4219         bnzi += nlnk;
4220         nextrow[k]++; nextai[k]++;
4221       }
4222     }
4223     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4224 
4225     /* if free space is not available, make more free space */
4226     if (current_space->local_remaining<bnzi) {
4227       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4228       nspacedouble++;
4229     }
4230     /* copy data into free space, then initialize lnk */
4231     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4232     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4233 
4234     current_space->array           += bnzi;
4235     current_space->local_used      += bnzi;
4236     current_space->local_remaining -= bnzi;
4237 
4238     bi[i+1] = bi[i] + bnzi;
4239   }
4240 
4241   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4242 
4243   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4244   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4245   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4246 
4247   /* create symbolic parallel matrix B_mpi */
4248   /*---------------------------------------*/
4249   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4250   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4251   if (n==PETSC_DECIDE) {
4252     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4253   } else {
4254     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4255   }
4256   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4257   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4258   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4259   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4260   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4261 
4262   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4263   B_mpi->assembled    = PETSC_FALSE;
4264   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4265   merge->bi           = bi;
4266   merge->bj           = bj;
4267   merge->buf_ri       = buf_ri;
4268   merge->buf_rj       = buf_rj;
4269   merge->coi          = NULL;
4270   merge->coj          = NULL;
4271   merge->owners_co    = NULL;
4272 
4273   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4274 
4275   /* attach the supporting struct to B_mpi for reuse */
4276   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4277   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4278   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4279   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4280   *mpimat = B_mpi;
4281 
4282   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4283   PetscFunctionReturn(0);
4284 }
4285 
4286 #undef __FUNCT__
4287 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4288 /*@C
4289       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4290                  matrices from each processor
4291 
4292     Collective on MPI_Comm
4293 
4294    Input Parameters:
4295 +    comm - the communicators the parallel matrix will live on
4296 .    seqmat - the input sequential matrices
4297 .    m - number of local rows (or PETSC_DECIDE)
4298 .    n - number of local columns (or PETSC_DECIDE)
4299 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4300 
4301    Output Parameter:
4302 .    mpimat - the parallel matrix generated
4303 
4304     Level: advanced
4305 
4306    Notes:
4307      The dimensions of the sequential matrix in each processor MUST be the same.
4308      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4309      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4310 @*/
4311 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4312 {
4313   PetscErrorCode ierr;
4314   PetscMPIInt    size;
4315 
4316   PetscFunctionBegin;
4317   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4318   if (size == 1) {
4319     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4320     if (scall == MAT_INITIAL_MATRIX) {
4321       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4322     } else {
4323       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4324     }
4325     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4326     PetscFunctionReturn(0);
4327   }
4328   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4329   if (scall == MAT_INITIAL_MATRIX) {
4330     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4331   }
4332   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4333   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4334   PetscFunctionReturn(0);
4335 }
4336 
4337 #undef __FUNCT__
4338 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4339 /*@
4340      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4341           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4342           with MatGetSize()
4343 
4344     Not Collective
4345 
4346    Input Parameters:
4347 +    A - the matrix
4348 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4349 
4350    Output Parameter:
4351 .    A_loc - the local sequential matrix generated
4352 
4353     Level: developer
4354 
4355 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4356 
4357 @*/
4358 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4359 {
4360   PetscErrorCode ierr;
4361   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4362   Mat_SeqAIJ     *mat,*a,*b;
4363   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4364   MatScalar      *aa,*ba,*cam;
4365   PetscScalar    *ca;
4366   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4367   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4368   PetscBool      match;
4369   MPI_Comm       comm;
4370   PetscMPIInt    size;
4371 
4372   PetscFunctionBegin;
4373   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4374   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4375   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4376   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4377   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4378 
4379   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4380   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4381   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4382   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4383   aa = a->a; ba = b->a;
4384   if (scall == MAT_INITIAL_MATRIX) {
4385     if (size == 1) {
4386       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4387       PetscFunctionReturn(0);
4388     }
4389 
4390     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4391     ci[0] = 0;
4392     for (i=0; i<am; i++) {
4393       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4394     }
4395     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4396     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4397     k    = 0;
4398     for (i=0; i<am; i++) {
4399       ncols_o = bi[i+1] - bi[i];
4400       ncols_d = ai[i+1] - ai[i];
4401       /* off-diagonal portion of A */
4402       for (jo=0; jo<ncols_o; jo++) {
4403         col = cmap[*bj];
4404         if (col >= cstart) break;
4405         cj[k]   = col; bj++;
4406         ca[k++] = *ba++;
4407       }
4408       /* diagonal portion of A */
4409       for (j=0; j<ncols_d; j++) {
4410         cj[k]   = cstart + *aj++;
4411         ca[k++] = *aa++;
4412       }
4413       /* off-diagonal portion of A */
4414       for (j=jo; j<ncols_o; j++) {
4415         cj[k]   = cmap[*bj++];
4416         ca[k++] = *ba++;
4417       }
4418     }
4419     /* put together the new matrix */
4420     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4421     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4422     /* Since these are PETSc arrays, change flags to free them as necessary. */
4423     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4424     mat->free_a  = PETSC_TRUE;
4425     mat->free_ij = PETSC_TRUE;
4426     mat->nonew   = 0;
4427   } else if (scall == MAT_REUSE_MATRIX) {
4428     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4429     ci = mat->i; cj = mat->j; cam = mat->a;
4430     for (i=0; i<am; i++) {
4431       /* off-diagonal portion of A */
4432       ncols_o = bi[i+1] - bi[i];
4433       for (jo=0; jo<ncols_o; jo++) {
4434         col = cmap[*bj];
4435         if (col >= cstart) break;
4436         *cam++ = *ba++; bj++;
4437       }
4438       /* diagonal portion of A */
4439       ncols_d = ai[i+1] - ai[i];
4440       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4441       /* off-diagonal portion of A */
4442       for (j=jo; j<ncols_o; j++) {
4443         *cam++ = *ba++; bj++;
4444       }
4445     }
4446   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4447   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4448   PetscFunctionReturn(0);
4449 }
4450 
4451 #undef __FUNCT__
4452 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4453 /*@C
4454      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4455 
4456     Not Collective
4457 
4458    Input Parameters:
4459 +    A - the matrix
4460 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4461 -    row, col - index sets of rows and columns to extract (or NULL)
4462 
4463    Output Parameter:
4464 .    A_loc - the local sequential matrix generated
4465 
4466     Level: developer
4467 
4468 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4469 
4470 @*/
4471 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4472 {
4473   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4474   PetscErrorCode ierr;
4475   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4476   IS             isrowa,iscola;
4477   Mat            *aloc;
4478   PetscBool      match;
4479 
4480   PetscFunctionBegin;
4481   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4482   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4483   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4484   if (!row) {
4485     start = A->rmap->rstart; end = A->rmap->rend;
4486     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4487   } else {
4488     isrowa = *row;
4489   }
4490   if (!col) {
4491     start = A->cmap->rstart;
4492     cmap  = a->garray;
4493     nzA   = a->A->cmap->n;
4494     nzB   = a->B->cmap->n;
4495     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4496     ncols = 0;
4497     for (i=0; i<nzB; i++) {
4498       if (cmap[i] < start) idx[ncols++] = cmap[i];
4499       else break;
4500     }
4501     imark = i;
4502     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4503     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4504     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4505   } else {
4506     iscola = *col;
4507   }
4508   if (scall != MAT_INITIAL_MATRIX) {
4509     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4510     aloc[0] = *A_loc;
4511   }
4512   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4513   *A_loc = aloc[0];
4514   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4515   if (!row) {
4516     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4517   }
4518   if (!col) {
4519     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4520   }
4521   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4522   PetscFunctionReturn(0);
4523 }
4524 
4525 #undef __FUNCT__
4526 #define __FUNCT__ "MatGetBrowsOfAcols"
4527 /*@C
4528     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4529 
4530     Collective on Mat
4531 
4532    Input Parameters:
4533 +    A,B - the matrices in mpiaij format
4534 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4535 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4536 
4537    Output Parameter:
4538 +    rowb, colb - index sets of rows and columns of B to extract
4539 -    B_seq - the sequential matrix generated
4540 
4541     Level: developer
4542 
4543 @*/
4544 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4545 {
4546   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4547   PetscErrorCode ierr;
4548   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4549   IS             isrowb,iscolb;
4550   Mat            *bseq=NULL;
4551 
4552   PetscFunctionBegin;
4553   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4554     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4555   }
4556   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4557 
4558   if (scall == MAT_INITIAL_MATRIX) {
4559     start = A->cmap->rstart;
4560     cmap  = a->garray;
4561     nzA   = a->A->cmap->n;
4562     nzB   = a->B->cmap->n;
4563     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4564     ncols = 0;
4565     for (i=0; i<nzB; i++) {  /* row < local row index */
4566       if (cmap[i] < start) idx[ncols++] = cmap[i];
4567       else break;
4568     }
4569     imark = i;
4570     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4571     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4572     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4573     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4574   } else {
4575     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4576     isrowb  = *rowb; iscolb = *colb;
4577     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4578     bseq[0] = *B_seq;
4579   }
4580   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4581   *B_seq = bseq[0];
4582   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4583   if (!rowb) {
4584     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4585   } else {
4586     *rowb = isrowb;
4587   }
4588   if (!colb) {
4589     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4590   } else {
4591     *colb = iscolb;
4592   }
4593   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4594   PetscFunctionReturn(0);
4595 }
4596 
4597 #undef __FUNCT__
4598 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4599 /*
4600     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4601     of the OFF-DIAGONAL portion of local A
4602 
4603     Collective on Mat
4604 
4605    Input Parameters:
4606 +    A,B - the matrices in mpiaij format
4607 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4608 
4609    Output Parameter:
4610 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4611 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4612 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4613 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4614 
4615     Level: developer
4616 
4617 */
4618 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4619 {
4620   VecScatter_MPI_General *gen_to,*gen_from;
4621   PetscErrorCode         ierr;
4622   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4623   Mat_SeqAIJ             *b_oth;
4624   VecScatter             ctx =a->Mvctx;
4625   MPI_Comm               comm;
4626   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4627   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4628   PetscScalar            *rvalues,*svalues;
4629   MatScalar              *b_otha,*bufa,*bufA;
4630   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4631   MPI_Request            *rwaits = NULL,*swaits = NULL;
4632   MPI_Status             *sstatus,rstatus;
4633   PetscMPIInt            jj,size;
4634   PetscInt               *cols,sbs,rbs;
4635   PetscScalar            *vals;
4636 
4637   PetscFunctionBegin;
4638   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4639   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4640 
4641   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4642     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4643   }
4644   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4645   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4646 
4647   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4648   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4649   rvalues  = gen_from->values; /* holds the length of receiving row */
4650   svalues  = gen_to->values;   /* holds the length of sending row */
4651   nrecvs   = gen_from->n;
4652   nsends   = gen_to->n;
4653 
4654   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4655   srow    = gen_to->indices;    /* local row index to be sent */
4656   sstarts = gen_to->starts;
4657   sprocs  = gen_to->procs;
4658   sstatus = gen_to->sstatus;
4659   sbs     = gen_to->bs;
4660   rstarts = gen_from->starts;
4661   rprocs  = gen_from->procs;
4662   rbs     = gen_from->bs;
4663 
4664   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4665   if (scall == MAT_INITIAL_MATRIX) {
4666     /* i-array */
4667     /*---------*/
4668     /*  post receives */
4669     for (i=0; i<nrecvs; i++) {
4670       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4671       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4672       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4673     }
4674 
4675     /* pack the outgoing message */
4676     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4677 
4678     sstartsj[0] = 0;
4679     rstartsj[0] = 0;
4680     len         = 0; /* total length of j or a array to be sent */
4681     k           = 0;
4682     for (i=0; i<nsends; i++) {
4683       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4684       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4685       for (j=0; j<nrows; j++) {
4686         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4687         for (l=0; l<sbs; l++) {
4688           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4689 
4690           rowlen[j*sbs+l] = ncols;
4691 
4692           len += ncols;
4693           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4694         }
4695         k++;
4696       }
4697       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4698 
4699       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4700     }
4701     /* recvs and sends of i-array are completed */
4702     i = nrecvs;
4703     while (i--) {
4704       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4705     }
4706     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4707 
4708     /* allocate buffers for sending j and a arrays */
4709     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4710     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4711 
4712     /* create i-array of B_oth */
4713     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4714 
4715     b_othi[0] = 0;
4716     len       = 0; /* total length of j or a array to be received */
4717     k         = 0;
4718     for (i=0; i<nrecvs; i++) {
4719       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4720       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4721       for (j=0; j<nrows; j++) {
4722         b_othi[k+1] = b_othi[k] + rowlen[j];
4723         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4724         k++;
4725       }
4726       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4727     }
4728 
4729     /* allocate space for j and a arrrays of B_oth */
4730     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4731     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4732 
4733     /* j-array */
4734     /*---------*/
4735     /*  post receives of j-array */
4736     for (i=0; i<nrecvs; i++) {
4737       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4738       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4739     }
4740 
4741     /* pack the outgoing message j-array */
4742     k = 0;
4743     for (i=0; i<nsends; i++) {
4744       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4745       bufJ  = bufj+sstartsj[i];
4746       for (j=0; j<nrows; j++) {
4747         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4748         for (ll=0; ll<sbs; ll++) {
4749           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4750           for (l=0; l<ncols; l++) {
4751             *bufJ++ = cols[l];
4752           }
4753           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4754         }
4755       }
4756       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4757     }
4758 
4759     /* recvs and sends of j-array are completed */
4760     i = nrecvs;
4761     while (i--) {
4762       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4763     }
4764     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4765   } else if (scall == MAT_REUSE_MATRIX) {
4766     sstartsj = *startsj_s;
4767     rstartsj = *startsj_r;
4768     bufa     = *bufa_ptr;
4769     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4770     b_otha   = b_oth->a;
4771   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4772 
4773   /* a-array */
4774   /*---------*/
4775   /*  post receives of a-array */
4776   for (i=0; i<nrecvs; i++) {
4777     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4778     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4779   }
4780 
4781   /* pack the outgoing message a-array */
4782   k = 0;
4783   for (i=0; i<nsends; i++) {
4784     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4785     bufA  = bufa+sstartsj[i];
4786     for (j=0; j<nrows; j++) {
4787       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4788       for (ll=0; ll<sbs; ll++) {
4789         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4790         for (l=0; l<ncols; l++) {
4791           *bufA++ = vals[l];
4792         }
4793         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4794       }
4795     }
4796     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4797   }
4798   /* recvs and sends of a-array are completed */
4799   i = nrecvs;
4800   while (i--) {
4801     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4802   }
4803   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4804   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4805 
4806   if (scall == MAT_INITIAL_MATRIX) {
4807     /* put together the new matrix */
4808     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4809 
4810     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4811     /* Since these are PETSc arrays, change flags to free them as necessary. */
4812     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4813     b_oth->free_a  = PETSC_TRUE;
4814     b_oth->free_ij = PETSC_TRUE;
4815     b_oth->nonew   = 0;
4816 
4817     ierr = PetscFree(bufj);CHKERRQ(ierr);
4818     if (!startsj_s || !bufa_ptr) {
4819       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4820       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4821     } else {
4822       *startsj_s = sstartsj;
4823       *startsj_r = rstartsj;
4824       *bufa_ptr  = bufa;
4825     }
4826   }
4827   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4828   PetscFunctionReturn(0);
4829 }
4830 
4831 #undef __FUNCT__
4832 #define __FUNCT__ "MatGetCommunicationStructs"
4833 /*@C
4834   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4835 
4836   Not Collective
4837 
4838   Input Parameters:
4839 . A - The matrix in mpiaij format
4840 
4841   Output Parameter:
4842 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4843 . colmap - A map from global column index to local index into lvec
4844 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4845 
4846   Level: developer
4847 
4848 @*/
4849 #if defined(PETSC_USE_CTABLE)
4850 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4851 #else
4852 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4853 #endif
4854 {
4855   Mat_MPIAIJ *a;
4856 
4857   PetscFunctionBegin;
4858   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4859   PetscValidPointer(lvec, 2);
4860   PetscValidPointer(colmap, 3);
4861   PetscValidPointer(multScatter, 4);
4862   a = (Mat_MPIAIJ*) A->data;
4863   if (lvec) *lvec = a->lvec;
4864   if (colmap) *colmap = a->colmap;
4865   if (multScatter) *multScatter = a->Mvctx;
4866   PetscFunctionReturn(0);
4867 }
4868 
4869 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4870 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4871 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4872 #if defined(PETSC_HAVE_ELEMENTAL)
4873 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4874 #endif
4875 
4876 #undef __FUNCT__
4877 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4878 /*
4879     Computes (B'*A')' since computing B*A directly is untenable
4880 
4881                n                       p                          p
4882         (              )       (              )         (                  )
4883       m (      A       )  *  n (       B      )   =   m (         C        )
4884         (              )       (              )         (                  )
4885 
4886 */
4887 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4888 {
4889   PetscErrorCode ierr;
4890   Mat            At,Bt,Ct;
4891 
4892   PetscFunctionBegin;
4893   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4894   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4895   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4896   ierr = MatDestroy(&At);CHKERRQ(ierr);
4897   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4898   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4899   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4900   PetscFunctionReturn(0);
4901 }
4902 
4903 #undef __FUNCT__
4904 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4905 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4906 {
4907   PetscErrorCode ierr;
4908   PetscInt       m=A->rmap->n,n=B->cmap->n;
4909   Mat            Cmat;
4910 
4911   PetscFunctionBegin;
4912   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4913   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4914   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4915   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4916   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4917   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4918   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4919   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4920 
4921   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4922 
4923   *C = Cmat;
4924   PetscFunctionReturn(0);
4925 }
4926 
4927 /* ----------------------------------------------------------------*/
4928 #undef __FUNCT__
4929 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4930 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4931 {
4932   PetscErrorCode ierr;
4933 
4934   PetscFunctionBegin;
4935   if (scall == MAT_INITIAL_MATRIX) {
4936     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4937     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4938     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4939   }
4940   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4941   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4942   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4943   PetscFunctionReturn(0);
4944 }
4945 
4946 /*MC
4947    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4948 
4949    Options Database Keys:
4950 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4951 
4952   Level: beginner
4953 
4954 .seealso: MatCreateAIJ()
4955 M*/
4956 
4957 #undef __FUNCT__
4958 #define __FUNCT__ "MatCreate_MPIAIJ"
4959 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
4960 {
4961   Mat_MPIAIJ     *b;
4962   PetscErrorCode ierr;
4963   PetscMPIInt    size;
4964 
4965   PetscFunctionBegin;
4966   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
4967 
4968   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
4969   B->data       = (void*)b;
4970   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4971   B->assembled  = PETSC_FALSE;
4972   B->insertmode = NOT_SET_VALUES;
4973   b->size       = size;
4974 
4975   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
4976 
4977   /* build cache for off array entries formed */
4978   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
4979 
4980   b->donotstash  = PETSC_FALSE;
4981   b->colmap      = 0;
4982   b->garray      = 0;
4983   b->roworiented = PETSC_TRUE;
4984 
4985   /* stuff used for matrix vector multiply */
4986   b->lvec  = NULL;
4987   b->Mvctx = NULL;
4988 
4989   /* stuff for MatGetRow() */
4990   b->rowindices   = 0;
4991   b->rowvalues    = 0;
4992   b->getrowactive = PETSC_FALSE;
4993 
4994   /* flexible pointer used in CUSP/CUSPARSE classes */
4995   b->spptr = NULL;
4996 
4997   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
4998   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4999   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5000   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5001   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5002   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5003   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5004   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5005   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5006   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5007 #if defined(PETSC_HAVE_ELEMENTAL)
5008   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5009 #endif
5010   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5011   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5012   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5013   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5014   PetscFunctionReturn(0);
5015 }
5016 
5017 #undef __FUNCT__
5018 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5019 /*@C
5020      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5021          and "off-diagonal" part of the matrix in CSR format.
5022 
5023    Collective on MPI_Comm
5024 
5025    Input Parameters:
5026 +  comm - MPI communicator
5027 .  m - number of local rows (Cannot be PETSC_DECIDE)
5028 .  n - This value should be the same as the local size used in creating the
5029        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5030        calculated if N is given) For square matrices n is almost always m.
5031 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5032 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5033 .   i - row indices for "diagonal" portion of matrix
5034 .   j - column indices
5035 .   a - matrix values
5036 .   oi - row indices for "off-diagonal" portion of matrix
5037 .   oj - column indices
5038 -   oa - matrix values
5039 
5040    Output Parameter:
5041 .   mat - the matrix
5042 
5043    Level: advanced
5044 
5045    Notes:
5046        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5047        must free the arrays once the matrix has been destroyed and not before.
5048 
5049        The i and j indices are 0 based
5050 
5051        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5052 
5053        This sets local rows and cannot be used to set off-processor values.
5054 
5055        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5056        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5057        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5058        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5059        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5060        communication if it is known that only local entries will be set.
5061 
5062 .keywords: matrix, aij, compressed row, sparse, parallel
5063 
5064 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5065           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5066 @*/
5067 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5068 {
5069   PetscErrorCode ierr;
5070   Mat_MPIAIJ     *maij;
5071 
5072   PetscFunctionBegin;
5073   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5074   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5075   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5076   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5077   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5078   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5079   maij = (Mat_MPIAIJ*) (*mat)->data;
5080 
5081   (*mat)->preallocated = PETSC_TRUE;
5082 
5083   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5084   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5085 
5086   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5087   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5088 
5089   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5090   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5091   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5092   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5093 
5094   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5095   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5096   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5097   PetscFunctionReturn(0);
5098 }
5099 
5100 /*
5101     Special version for direct calls from Fortran
5102 */
5103 #include <petsc/private/fortranimpl.h>
5104 
5105 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5106 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5107 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5108 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5109 #endif
5110 
5111 /* Change these macros so can be used in void function */
5112 #undef CHKERRQ
5113 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5114 #undef SETERRQ2
5115 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5116 #undef SETERRQ3
5117 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5118 #undef SETERRQ
5119 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5120 
5121 #undef __FUNCT__
5122 #define __FUNCT__ "matsetvaluesmpiaij_"
5123 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5124 {
5125   Mat            mat  = *mmat;
5126   PetscInt       m    = *mm, n = *mn;
5127   InsertMode     addv = *maddv;
5128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5129   PetscScalar    value;
5130   PetscErrorCode ierr;
5131 
5132   MatCheckPreallocated(mat,1);
5133   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5134 
5135 #if defined(PETSC_USE_DEBUG)
5136   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5137 #endif
5138   {
5139     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5140     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5141     PetscBool roworiented = aij->roworiented;
5142 
5143     /* Some Variables required in the macro */
5144     Mat        A                 = aij->A;
5145     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5146     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5147     MatScalar  *aa               = a->a;
5148     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5149     Mat        B                 = aij->B;
5150     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5151     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5152     MatScalar  *ba               = b->a;
5153 
5154     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5155     PetscInt  nonew = a->nonew;
5156     MatScalar *ap1,*ap2;
5157 
5158     PetscFunctionBegin;
5159     for (i=0; i<m; i++) {
5160       if (im[i] < 0) continue;
5161 #if defined(PETSC_USE_DEBUG)
5162       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5163 #endif
5164       if (im[i] >= rstart && im[i] < rend) {
5165         row      = im[i] - rstart;
5166         lastcol1 = -1;
5167         rp1      = aj + ai[row];
5168         ap1      = aa + ai[row];
5169         rmax1    = aimax[row];
5170         nrow1    = ailen[row];
5171         low1     = 0;
5172         high1    = nrow1;
5173         lastcol2 = -1;
5174         rp2      = bj + bi[row];
5175         ap2      = ba + bi[row];
5176         rmax2    = bimax[row];
5177         nrow2    = bilen[row];
5178         low2     = 0;
5179         high2    = nrow2;
5180 
5181         for (j=0; j<n; j++) {
5182           if (roworiented) value = v[i*n+j];
5183           else value = v[i+j*m];
5184           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5185           if (in[j] >= cstart && in[j] < cend) {
5186             col = in[j] - cstart;
5187             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5188           } else if (in[j] < 0) continue;
5189 #if defined(PETSC_USE_DEBUG)
5190           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5191 #endif
5192           else {
5193             if (mat->was_assembled) {
5194               if (!aij->colmap) {
5195                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5196               }
5197 #if defined(PETSC_USE_CTABLE)
5198               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5199               col--;
5200 #else
5201               col = aij->colmap[in[j]] - 1;
5202 #endif
5203               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5204                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5205                 col  =  in[j];
5206                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5207                 B     = aij->B;
5208                 b     = (Mat_SeqAIJ*)B->data;
5209                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5210                 rp2   = bj + bi[row];
5211                 ap2   = ba + bi[row];
5212                 rmax2 = bimax[row];
5213                 nrow2 = bilen[row];
5214                 low2  = 0;
5215                 high2 = nrow2;
5216                 bm    = aij->B->rmap->n;
5217                 ba    = b->a;
5218               }
5219             } else col = in[j];
5220             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5221           }
5222         }
5223       } else if (!aij->donotstash) {
5224         if (roworiented) {
5225           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5226         } else {
5227           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5228         }
5229       }
5230     }
5231   }
5232   PetscFunctionReturnVoid();
5233 }
5234 
5235