xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fbfcfee5110779e3d6a9465ca0a2e0f9a1a6e5e3)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
46 {
47   PetscErrorCode ierr;
48   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
49 
50   PetscFunctionBegin;
51   if (mat->A) {
52     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
53     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
54   }
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
59 {
60   PetscErrorCode  ierr;
61   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
62   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
63   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
64   const PetscInt  *ia,*ib;
65   const MatScalar *aa,*bb;
66   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
67   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
68 
69   PetscFunctionBegin;
70   *keptrows = 0;
71   ia        = a->i;
72   ib        = b->i;
73   for (i=0; i<m; i++) {
74     na = ia[i+1] - ia[i];
75     nb = ib[i+1] - ib[i];
76     if (!na && !nb) {
77       cnt++;
78       goto ok1;
79     }
80     aa = a->a + ia[i];
81     for (j=0; j<na; j++) {
82       if (aa[j] != 0.0) goto ok1;
83     }
84     bb = b->a + ib[i];
85     for (j=0; j <nb; j++) {
86       if (bb[j] != 0.0) goto ok1;
87     }
88     cnt++;
89 ok1:;
90   }
91   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
92   if (!n0rows) PetscFunctionReturn(0);
93   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
94   cnt  = 0;
95   for (i=0; i<m; i++) {
96     na = ia[i+1] - ia[i];
97     nb = ib[i+1] - ib[i];
98     if (!na && !nb) continue;
99     aa = a->a + ia[i];
100     for (j=0; j<na;j++) {
101       if (aa[j] != 0.0) {
102         rows[cnt++] = rstart + i;
103         goto ok2;
104       }
105     }
106     bb = b->a + ib[i];
107     for (j=0; j<nb; j++) {
108       if (bb[j] != 0.0) {
109         rows[cnt++] = rstart + i;
110         goto ok2;
111       }
112     }
113 ok2:;
114   }
115   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
116   PetscFunctionReturn(0);
117 }
118 
119 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
120 {
121   PetscErrorCode    ierr;
122   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
123 
124   PetscFunctionBegin;
125   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
126     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
127   } else {
128     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 
462 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
463   { \
464     if (col <= lastcol2) low2 = 0;                        \
465     else high2 = nrow2;                                   \
466     lastcol2 = col;                                       \
467     while (high2-low2 > 5) {                              \
468       t = (low2+high2)/2;                                 \
469       if (rp2[t] > col) high2 = t;                        \
470       else             low2  = t;                         \
471     }                                                     \
472     for (_i=low2; _i<high2; _i++) {                       \
473       if (rp2[_i] > col) break;                           \
474       if (rp2[_i] == col) {                               \
475         if (addv == ADD_VALUES) ap2[_i] += value;         \
476         else                    ap2[_i] = value;          \
477         goto b_noinsert;                                  \
478       }                                                   \
479     }                                                     \
480     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
481     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
482     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
483     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
484     N = nrow2++ - 1; b->nz++; high2++;                    \
485     /* shift up all the later entries in this row */      \
486     for (ii=N; ii>=_i; ii--) {                            \
487       rp2[ii+1] = rp2[ii];                                \
488       ap2[ii+1] = ap2[ii];                                \
489     }                                                     \
490     rp2[_i] = col;                                        \
491     ap2[_i] = value;                                      \
492     B->nonzerostate++;                                    \
493     b_noinsert: ;                                         \
494     bilen[row] = nrow2;                                   \
495   }
496 
497 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
498 {
499   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
500   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
501   PetscErrorCode ierr;
502   PetscInt       l,*garray = mat->garray,diag;
503 
504   PetscFunctionBegin;
505   /* code only works for square matrices A */
506 
507   /* find size of row to the left of the diagonal part */
508   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
509   row  = row - diag;
510   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
511     if (garray[b->j[b->i[row]+l]] > diag) break;
512   }
513   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
514 
515   /* diagonal part */
516   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* right of diagonal part */
519   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 
523 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
524 {
525   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
526   PetscScalar    value;
527   PetscErrorCode ierr;
528   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
529   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
530   PetscBool      roworiented = aij->roworiented;
531 
532   /* Some Variables required in the macro */
533   Mat        A                 = aij->A;
534   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
535   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
536   MatScalar  *aa               = a->a;
537   PetscBool  ignorezeroentries = a->ignorezeroentries;
538   Mat        B                 = aij->B;
539   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
540   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
541   MatScalar  *ba               = b->a;
542 
543   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
544   PetscInt  nonew;
545   MatScalar *ap1,*ap2;
546 
547   PetscFunctionBegin;
548   for (i=0; i<m; i++) {
549     if (im[i] < 0) continue;
550 #if defined(PETSC_USE_DEBUG)
551     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
552 #endif
553     if (im[i] >= rstart && im[i] < rend) {
554       row      = im[i] - rstart;
555       lastcol1 = -1;
556       rp1      = aj + ai[row];
557       ap1      = aa + ai[row];
558       rmax1    = aimax[row];
559       nrow1    = ailen[row];
560       low1     = 0;
561       high1    = nrow1;
562       lastcol2 = -1;
563       rp2      = bj + bi[row];
564       ap2      = ba + bi[row];
565       rmax2    = bimax[row];
566       nrow2    = bilen[row];
567       low2     = 0;
568       high2    = nrow2;
569 
570       for (j=0; j<n; j++) {
571         if (roworiented) value = v[i*n+j];
572         else             value = v[i+j*m];
573         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
574         if (in[j] >= cstart && in[j] < cend) {
575           col   = in[j] - cstart;
576           nonew = a->nonew;
577           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
578         } else if (in[j] < 0) continue;
579 #if defined(PETSC_USE_DEBUG)
580         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
581 #endif
582         else {
583           if (mat->was_assembled) {
584             if (!aij->colmap) {
585               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
586             }
587 #if defined(PETSC_USE_CTABLE)
588             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
589             col--;
590 #else
591             col = aij->colmap[in[j]] - 1;
592 #endif
593             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
594               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
595               col  =  in[j];
596               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
597               B     = aij->B;
598               b     = (Mat_SeqAIJ*)B->data;
599               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
600               rp2   = bj + bi[row];
601               ap2   = ba + bi[row];
602               rmax2 = bimax[row];
603               nrow2 = bilen[row];
604               low2  = 0;
605               high2 = nrow2;
606               bm    = aij->B->rmap->n;
607               ba    = b->a;
608             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
609           } else col = in[j];
610           nonew = b->nonew;
611           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
612         }
613       }
614     } else {
615       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
616       if (!aij->donotstash) {
617         mat->assembled = PETSC_FALSE;
618         if (roworiented) {
619           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
620         } else {
621           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
622         }
623       }
624     }
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
630 {
631   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
632   PetscErrorCode ierr;
633   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
634   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
635 
636   PetscFunctionBegin;
637   for (i=0; i<m; i++) {
638     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
639     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
640     if (idxm[i] >= rstart && idxm[i] < rend) {
641       row = idxm[i] - rstart;
642       for (j=0; j<n; j++) {
643         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
644         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
645         if (idxn[j] >= cstart && idxn[j] < cend) {
646           col  = idxn[j] - cstart;
647           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
648         } else {
649           if (!aij->colmap) {
650             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
651           }
652 #if defined(PETSC_USE_CTABLE)
653           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
654           col--;
655 #else
656           col = aij->colmap[idxn[j]] - 1;
657 #endif
658           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
659           else {
660             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
670 
671 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
672 {
673   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
674   PetscErrorCode ierr;
675   PetscInt       nstash,reallocs;
676 
677   PetscFunctionBegin;
678   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
679 
680   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
681   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
682   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
683   PetscFunctionReturn(0);
684 }
685 
686 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
687 {
688   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
689   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
690   PetscErrorCode ierr;
691   PetscMPIInt    n;
692   PetscInt       i,j,rstart,ncols,flg;
693   PetscInt       *row,*col;
694   PetscBool      other_disassembled;
695   PetscScalar    *val;
696 
697   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
698 
699   PetscFunctionBegin;
700   if (!aij->donotstash && !mat->nooffprocentries) {
701     while (1) {
702       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
703       if (!flg) break;
704 
705       for (i=0; i<n; ) {
706         /* Now identify the consecutive vals belonging to the same row */
707         for (j=i,rstart=row[j]; j<n; j++) {
708           if (row[j] != rstart) break;
709         }
710         if (j < n) ncols = j-i;
711         else       ncols = n-i;
712         /* Now assemble all these values with a single function call */
713         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
714 
715         i = j;
716       }
717     }
718     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
719   }
720   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
721   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
722 
723   /* determine if any processor has disassembled, if so we must
724      also disassemble ourselfs, in order that we may reassemble. */
725   /*
726      if nonzero structure of submatrix B cannot change then we know that
727      no processor disassembled thus we can skip this stuff
728   */
729   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
730     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
731     if (mat->was_assembled && !other_disassembled) {
732       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
733     }
734   }
735   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
736     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
737   }
738   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
739   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
740   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
741 
742   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
743 
744   aij->rowvalues = 0;
745 
746   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
747   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
748 
749   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
750   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
751     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
752     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
758 {
759   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
760   PetscErrorCode ierr;
761 
762   PetscFunctionBegin;
763   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
764   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
769 {
770   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
771   PetscInt      *lrows;
772   PetscInt       r, len;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   /* get locally owned rows */
777   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
778   /* fix right hand side if needed */
779   if (x && b) {
780     const PetscScalar *xx;
781     PetscScalar       *bb;
782 
783     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
784     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
785     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
786     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
787     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
788   }
789   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
790   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
791   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
792     PetscBool cong;
793     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
794     if (cong) A->congruentlayouts = 1;
795     else      A->congruentlayouts = 0;
796   }
797   if ((diag != 0.0) && A->congruentlayouts) {
798     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
799   } else if (diag != 0.0) {
800     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
801     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
802     for (r = 0; r < len; ++r) {
803       const PetscInt row = lrows[r] + A->rmap->rstart;
804       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
805     }
806     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
808   } else {
809     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
810   }
811   ierr = PetscFree(lrows);CHKERRQ(ierr);
812 
813   /* only change matrix nonzero state if pattern was allowed to be changed */
814   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
815     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
816     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
817   }
818   PetscFunctionReturn(0);
819 }
820 
821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
822 {
823   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
824   PetscErrorCode    ierr;
825   PetscMPIInt       n = A->rmap->n;
826   PetscInt          i,j,r,m,p = 0,len = 0;
827   PetscInt          *lrows,*owners = A->rmap->range;
828   PetscSFNode       *rrows;
829   PetscSF           sf;
830   const PetscScalar *xx;
831   PetscScalar       *bb,*mask;
832   Vec               xmask,lmask;
833   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
834   const PetscInt    *aj, *ii,*ridx;
835   PetscScalar       *aa;
836 
837   PetscFunctionBegin;
838   /* Create SF where leaves are input rows and roots are owned rows */
839   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
840   for (r = 0; r < n; ++r) lrows[r] = -1;
841   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
842   for (r = 0; r < N; ++r) {
843     const PetscInt idx   = rows[r];
844     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
845     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
846       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
847     }
848     rrows[r].rank  = p;
849     rrows[r].index = rows[r] - owners[p];
850   }
851   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
852   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
853   /* Collect flags for rows to be zeroed */
854   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
856   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
857   /* Compress and put in row numbers */
858   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
859   /* zero diagonal part of matrix */
860   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
861   /* handle off diagonal part of matrix */
862   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
863   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
864   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
865   for (i=0; i<len; i++) bb[lrows[i]] = 1;
866   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
867   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
870   if (x) {
871     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
874     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
875   }
876   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
877   /* remove zeroed rows of off diagonal matrix */
878   ii = aij->i;
879   for (i=0; i<len; i++) {
880     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
881   }
882   /* loop over all elements of off process part of matrix zeroing removed columns*/
883   if (aij->compressedrow.use) {
884     m    = aij->compressedrow.nrows;
885     ii   = aij->compressedrow.i;
886     ridx = aij->compressedrow.rindex;
887     for (i=0; i<m; i++) {
888       n  = ii[i+1] - ii[i];
889       aj = aij->j + ii[i];
890       aa = aij->a + ii[i];
891 
892       for (j=0; j<n; j++) {
893         if (PetscAbsScalar(mask[*aj])) {
894           if (b) bb[*ridx] -= *aa*xx[*aj];
895           *aa = 0.0;
896         }
897         aa++;
898         aj++;
899       }
900       ridx++;
901     }
902   } else { /* do not use compressed row format */
903     m = l->B->rmap->n;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[i] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916     }
917   }
918   if (x) {
919     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
920     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921   }
922   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
923   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
924   ierr = PetscFree(lrows);CHKERRQ(ierr);
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
928     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
929     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
935 {
936   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
937   PetscErrorCode ierr;
938   PetscInt       nt;
939 
940   PetscFunctionBegin;
941   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
942   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
943   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
945   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
947   PetscFunctionReturn(0);
948 }
949 
950 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
951 {
952   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
953   PetscErrorCode ierr;
954 
955   PetscFunctionBegin;
956   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
961 {
962   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
967   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
968   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
969   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
974 {
975   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
976   PetscErrorCode ierr;
977   PetscBool      merged;
978 
979   PetscFunctionBegin;
980   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
981   /* do nondiagonal part */
982   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
983   if (!merged) {
984     /* send it on its way */
985     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
986     /* do local part */
987     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
988     /* receive remote parts: note this assumes the values are not actually */
989     /* added in yy until the next line, */
990     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
991   } else {
992     /* do local part */
993     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
994     /* send it on its way */
995     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
996     /* values actually were received in the Begin() but we need to call this nop */
997     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
998   }
999   PetscFunctionReturn(0);
1000 }
1001 
1002 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1003 {
1004   MPI_Comm       comm;
1005   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1006   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1007   IS             Me,Notme;
1008   PetscErrorCode ierr;
1009   PetscInt       M,N,first,last,*notme,i;
1010   PetscMPIInt    size;
1011 
1012   PetscFunctionBegin;
1013   /* Easy test: symmetric diagonal block */
1014   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1015   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1016   if (!*f) PetscFunctionReturn(0);
1017   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1018   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1019   if (size == 1) PetscFunctionReturn(0);
1020 
1021   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1022   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1023   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1024   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1025   for (i=0; i<first; i++) notme[i] = i;
1026   for (i=last; i<M; i++) notme[i-last+first] = i;
1027   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1028   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1029   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1030   Aoff = Aoffs[0];
1031   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1032   Boff = Boffs[0];
1033   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1035   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1036   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1037   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1038   ierr = PetscFree(notme);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1043 {
1044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   /* do nondiagonal part */
1049   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1050   /* send it on its way */
1051   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1052   /* do local part */
1053   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1054   /* receive remote parts */
1055   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 /*
1060   This only works correctly for square matrices where the subblock A->A is the
1061    diagonal block
1062 */
1063 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1064 {
1065   PetscErrorCode ierr;
1066   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1067 
1068   PetscFunctionBegin;
1069   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1070   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1071   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079 
1080   PetscFunctionBegin;
1081   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1082   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1087 {
1088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1089   PetscErrorCode ierr;
1090 
1091   PetscFunctionBegin;
1092 #if defined(PETSC_USE_LOG)
1093   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1094 #endif
1095   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1096   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1097   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1098   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1099 #if defined(PETSC_USE_CTABLE)
1100   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1101 #else
1102   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1103 #endif
1104   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1105   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1106   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1107   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1108   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1109   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1110 
1111   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1112   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1113   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1114   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1115   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1116   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1117   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1118   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1119 #if defined(PETSC_HAVE_ELEMENTAL)
1120   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1121 #endif
1122 #if defined(PETSC_HAVE_HYPRE)
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1125 #endif
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1130 {
1131   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1132   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1133   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1134   PetscErrorCode ierr;
1135   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1136   int            fd;
1137   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1138   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1139   PetscScalar    *column_values;
1140   PetscInt       message_count,flowcontrolcount;
1141   FILE           *file;
1142 
1143   PetscFunctionBegin;
1144   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1145   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1146   nz   = A->nz + B->nz;
1147   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1148   if (!rank) {
1149     header[0] = MAT_FILE_CLASSID;
1150     header[1] = mat->rmap->N;
1151     header[2] = mat->cmap->N;
1152 
1153     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1154     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1155     /* get largest number of rows any processor has */
1156     rlen  = mat->rmap->n;
1157     range = mat->rmap->range;
1158     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1159   } else {
1160     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1161     rlen = mat->rmap->n;
1162   }
1163 
1164   /* load up the local row counts */
1165   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1166   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1167 
1168   /* store the row lengths to the file */
1169   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1170   if (!rank) {
1171     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1172     for (i=1; i<size; i++) {
1173       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1174       rlen = range[i+1] - range[i];
1175       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1176       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1177     }
1178     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1179   } else {
1180     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1181     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1182     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1183   }
1184   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1185 
1186   /* load up the local column indices */
1187   nzmax = nz; /* th processor needs space a largest processor needs */
1188   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1189   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1190   cnt   = 0;
1191   for (i=0; i<mat->rmap->n; i++) {
1192     for (j=B->i[i]; j<B->i[i+1]; j++) {
1193       if ((col = garray[B->j[j]]) > cstart) break;
1194       column_indices[cnt++] = col;
1195     }
1196     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1197     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1198   }
1199   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1200 
1201   /* store the column indices to the file */
1202   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1203   if (!rank) {
1204     MPI_Status status;
1205     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1206     for (i=1; i<size; i++) {
1207       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1208       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1209       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1210       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1211       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1212     }
1213     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1214   } else {
1215     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1216     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1217     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1218     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1219   }
1220   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1221 
1222   /* load up the local column values */
1223   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1224   cnt  = 0;
1225   for (i=0; i<mat->rmap->n; i++) {
1226     for (j=B->i[i]; j<B->i[i+1]; j++) {
1227       if (garray[B->j[j]] > cstart) break;
1228       column_values[cnt++] = B->a[j];
1229     }
1230     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1231     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1232   }
1233   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1234 
1235   /* store the column values to the file */
1236   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1237   if (!rank) {
1238     MPI_Status status;
1239     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1240     for (i=1; i<size; i++) {
1241       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1242       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1243       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1244       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1245       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1246     }
1247     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1248   } else {
1249     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1250     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1251     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1252     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1253   }
1254   ierr = PetscFree(column_values);CHKERRQ(ierr);
1255 
1256   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1257   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #include <petscdraw.h>
1262 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1263 {
1264   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1265   PetscErrorCode    ierr;
1266   PetscMPIInt       rank = aij->rank,size = aij->size;
1267   PetscBool         isdraw,iascii,isbinary;
1268   PetscViewer       sviewer;
1269   PetscViewerFormat format;
1270 
1271   PetscFunctionBegin;
1272   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1273   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1274   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1275   if (iascii) {
1276     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1277     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1278       MatInfo   info;
1279       PetscBool inodes;
1280 
1281       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1282       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1283       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1284       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1285       if (!inodes) {
1286         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1287                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1288       } else {
1289         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1290                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1291       }
1292       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1293       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1294       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1295       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1296       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1297       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1298       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1299       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1300       PetscFunctionReturn(0);
1301     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1302       PetscInt inodecount,inodelimit,*inodes;
1303       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1304       if (inodes) {
1305         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1306       } else {
1307         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1308       }
1309       PetscFunctionReturn(0);
1310     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1311       PetscFunctionReturn(0);
1312     }
1313   } else if (isbinary) {
1314     if (size == 1) {
1315       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1316       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1317     } else {
1318       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1319     }
1320     PetscFunctionReturn(0);
1321   } else if (isdraw) {
1322     PetscDraw draw;
1323     PetscBool isnull;
1324     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1325     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1326     if (isnull) PetscFunctionReturn(0);
1327   }
1328 
1329   {
1330     /* assemble the entire matrix onto first processor. */
1331     Mat        A;
1332     Mat_SeqAIJ *Aloc;
1333     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1334     MatScalar  *a;
1335 
1336     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1337     if (!rank) {
1338       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1339     } else {
1340       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1341     }
1342     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1343     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1344     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1345     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1346     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1347 
1348     /* copy over the A part */
1349     Aloc = (Mat_SeqAIJ*)aij->A->data;
1350     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1351     row  = mat->rmap->rstart;
1352     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1353     for (i=0; i<m; i++) {
1354       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1355       row++;
1356       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1357     }
1358     aj = Aloc->j;
1359     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1360 
1361     /* copy over the B part */
1362     Aloc = (Mat_SeqAIJ*)aij->B->data;
1363     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1364     row  = mat->rmap->rstart;
1365     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1366     ct   = cols;
1367     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1368     for (i=0; i<m; i++) {
1369       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1370       row++;
1371       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1372     }
1373     ierr = PetscFree(ct);CHKERRQ(ierr);
1374     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1375     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1376     /*
1377        Everyone has to call to draw the matrix since the graphics waits are
1378        synchronized across all processors that share the PetscDraw object
1379     */
1380     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1381     if (!rank) {
1382       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1383       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1384     }
1385     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1386     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1387     ierr = MatDestroy(&A);CHKERRQ(ierr);
1388   }
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1393 {
1394   PetscErrorCode ierr;
1395   PetscBool      iascii,isdraw,issocket,isbinary;
1396 
1397   PetscFunctionBegin;
1398   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1399   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1400   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1401   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1402   if (iascii || isdraw || isbinary || issocket) {
1403     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1409 {
1410   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1411   PetscErrorCode ierr;
1412   Vec            bb1 = 0;
1413   PetscBool      hasop;
1414 
1415   PetscFunctionBegin;
1416   if (flag == SOR_APPLY_UPPER) {
1417     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1418     PetscFunctionReturn(0);
1419   }
1420 
1421   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1422     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1423   }
1424 
1425   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1426     if (flag & SOR_ZERO_INITIAL_GUESS) {
1427       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1428       its--;
1429     }
1430 
1431     while (its--) {
1432       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1433       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1434 
1435       /* update rhs: bb1 = bb - B*x */
1436       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1437       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1438 
1439       /* local sweep */
1440       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1441     }
1442   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1443     if (flag & SOR_ZERO_INITIAL_GUESS) {
1444       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1445       its--;
1446     }
1447     while (its--) {
1448       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1449       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1450 
1451       /* update rhs: bb1 = bb - B*x */
1452       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1453       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1454 
1455       /* local sweep */
1456       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1457     }
1458   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1459     if (flag & SOR_ZERO_INITIAL_GUESS) {
1460       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1461       its--;
1462     }
1463     while (its--) {
1464       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1465       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1466 
1467       /* update rhs: bb1 = bb - B*x */
1468       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1469       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1470 
1471       /* local sweep */
1472       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1473     }
1474   } else if (flag & SOR_EISENSTAT) {
1475     Vec xx1;
1476 
1477     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1478     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1479 
1480     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1481     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1482     if (!mat->diag) {
1483       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1484       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1485     }
1486     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1487     if (hasop) {
1488       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1489     } else {
1490       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1491     }
1492     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1493 
1494     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1495 
1496     /* local sweep */
1497     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1498     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1499     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1500   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1501 
1502   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1503 
1504   matin->factorerrortype = mat->A->factorerrortype;
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1509 {
1510   Mat            aA,aB,Aperm;
1511   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1512   PetscScalar    *aa,*ba;
1513   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1514   PetscSF        rowsf,sf;
1515   IS             parcolp = NULL;
1516   PetscBool      done;
1517   PetscErrorCode ierr;
1518 
1519   PetscFunctionBegin;
1520   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1521   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1522   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1523   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1524 
1525   /* Invert row permutation to find out where my rows should go */
1526   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1527   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1528   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1529   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1530   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1531   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1532 
1533   /* Invert column permutation to find out where my columns should go */
1534   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1535   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1536   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1537   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1538   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1539   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1540   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1541 
1542   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1543   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1544   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1545 
1546   /* Find out where my gcols should go */
1547   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1548   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1549   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1550   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1551   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1552   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1553   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1554   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1555 
1556   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1557   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1558   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1559   for (i=0; i<m; i++) {
1560     PetscInt row = rdest[i],rowner;
1561     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1562     for (j=ai[i]; j<ai[i+1]; j++) {
1563       PetscInt cowner,col = cdest[aj[j]];
1564       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1565       if (rowner == cowner) dnnz[i]++;
1566       else onnz[i]++;
1567     }
1568     for (j=bi[i]; j<bi[i+1]; j++) {
1569       PetscInt cowner,col = gcdest[bj[j]];
1570       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1571       if (rowner == cowner) dnnz[i]++;
1572       else onnz[i]++;
1573     }
1574   }
1575   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1576   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1577   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1578   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1579   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1580 
1581   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1582   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1583   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1584   for (i=0; i<m; i++) {
1585     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1586     PetscInt j0,rowlen;
1587     rowlen = ai[i+1] - ai[i];
1588     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1589       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1590       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1591     }
1592     rowlen = bi[i+1] - bi[i];
1593     for (j0=j=0; j<rowlen; j0=j) {
1594       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1595       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1596     }
1597   }
1598   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1599   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1600   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1601   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1602   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1603   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1604   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1605   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1606   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1607   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1608   *B = Aperm;
1609   PetscFunctionReturn(0);
1610 }
1611 
1612 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1613 {
1614   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1615   PetscErrorCode ierr;
1616 
1617   PetscFunctionBegin;
1618   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1619   if (ghosts) *ghosts = aij->garray;
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1624 {
1625   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1626   Mat            A    = mat->A,B = mat->B;
1627   PetscErrorCode ierr;
1628   PetscReal      isend[5],irecv[5];
1629 
1630   PetscFunctionBegin;
1631   info->block_size = 1.0;
1632   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1633 
1634   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1635   isend[3] = info->memory;  isend[4] = info->mallocs;
1636 
1637   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1638 
1639   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1640   isend[3] += info->memory;  isend[4] += info->mallocs;
1641   if (flag == MAT_LOCAL) {
1642     info->nz_used      = isend[0];
1643     info->nz_allocated = isend[1];
1644     info->nz_unneeded  = isend[2];
1645     info->memory       = isend[3];
1646     info->mallocs      = isend[4];
1647   } else if (flag == MAT_GLOBAL_MAX) {
1648     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1649 
1650     info->nz_used      = irecv[0];
1651     info->nz_allocated = irecv[1];
1652     info->nz_unneeded  = irecv[2];
1653     info->memory       = irecv[3];
1654     info->mallocs      = irecv[4];
1655   } else if (flag == MAT_GLOBAL_SUM) {
1656     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1657 
1658     info->nz_used      = irecv[0];
1659     info->nz_allocated = irecv[1];
1660     info->nz_unneeded  = irecv[2];
1661     info->memory       = irecv[3];
1662     info->mallocs      = irecv[4];
1663   }
1664   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1665   info->fill_ratio_needed = 0;
1666   info->factor_mallocs    = 0;
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1671 {
1672   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1673   PetscErrorCode ierr;
1674 
1675   PetscFunctionBegin;
1676   switch (op) {
1677   case MAT_NEW_NONZERO_LOCATIONS:
1678   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1679   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1680   case MAT_KEEP_NONZERO_PATTERN:
1681   case MAT_NEW_NONZERO_LOCATION_ERR:
1682   case MAT_USE_INODES:
1683   case MAT_IGNORE_ZERO_ENTRIES:
1684     MatCheckPreallocated(A,1);
1685     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1686     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1687     break;
1688   case MAT_ROW_ORIENTED:
1689     MatCheckPreallocated(A,1);
1690     a->roworiented = flg;
1691 
1692     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1693     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1694     break;
1695   case MAT_NEW_DIAGONALS:
1696     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1697     break;
1698   case MAT_IGNORE_OFF_PROC_ENTRIES:
1699     a->donotstash = flg;
1700     break;
1701   case MAT_SPD:
1702     A->spd_set = PETSC_TRUE;
1703     A->spd     = flg;
1704     if (flg) {
1705       A->symmetric                  = PETSC_TRUE;
1706       A->structurally_symmetric     = PETSC_TRUE;
1707       A->symmetric_set              = PETSC_TRUE;
1708       A->structurally_symmetric_set = PETSC_TRUE;
1709     }
1710     break;
1711   case MAT_SYMMETRIC:
1712     MatCheckPreallocated(A,1);
1713     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1714     break;
1715   case MAT_STRUCTURALLY_SYMMETRIC:
1716     MatCheckPreallocated(A,1);
1717     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1718     break;
1719   case MAT_HERMITIAN:
1720     MatCheckPreallocated(A,1);
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722     break;
1723   case MAT_SYMMETRY_ETERNAL:
1724     MatCheckPreallocated(A,1);
1725     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1726     break;
1727   case MAT_SUBMAT_SINGLEIS:
1728     A->submat_singleis = flg;
1729     break;
1730   default:
1731     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1732   }
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1737 {
1738   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1739   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1740   PetscErrorCode ierr;
1741   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1742   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1743   PetscInt       *cmap,*idx_p;
1744 
1745   PetscFunctionBegin;
1746   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1747   mat->getrowactive = PETSC_TRUE;
1748 
1749   if (!mat->rowvalues && (idx || v)) {
1750     /*
1751         allocate enough space to hold information from the longest row.
1752     */
1753     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1754     PetscInt   max = 1,tmp;
1755     for (i=0; i<matin->rmap->n; i++) {
1756       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1757       if (max < tmp) max = tmp;
1758     }
1759     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1760   }
1761 
1762   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1763   lrow = row - rstart;
1764 
1765   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1766   if (!v)   {pvA = 0; pvB = 0;}
1767   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1768   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1769   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1770   nztot = nzA + nzB;
1771 
1772   cmap = mat->garray;
1773   if (v  || idx) {
1774     if (nztot) {
1775       /* Sort by increasing column numbers, assuming A and B already sorted */
1776       PetscInt imark = -1;
1777       if (v) {
1778         *v = v_p = mat->rowvalues;
1779         for (i=0; i<nzB; i++) {
1780           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1781           else break;
1782         }
1783         imark = i;
1784         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1785         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1786       }
1787       if (idx) {
1788         *idx = idx_p = mat->rowindices;
1789         if (imark > -1) {
1790           for (i=0; i<imark; i++) {
1791             idx_p[i] = cmap[cworkB[i]];
1792           }
1793         } else {
1794           for (i=0; i<nzB; i++) {
1795             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1796             else break;
1797           }
1798           imark = i;
1799         }
1800         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1801         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1802       }
1803     } else {
1804       if (idx) *idx = 0;
1805       if (v)   *v   = 0;
1806     }
1807   }
1808   *nz  = nztot;
1809   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1810   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1815 {
1816   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1817 
1818   PetscFunctionBegin;
1819   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1820   aij->getrowactive = PETSC_FALSE;
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1825 {
1826   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1827   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1828   PetscErrorCode ierr;
1829   PetscInt       i,j,cstart = mat->cmap->rstart;
1830   PetscReal      sum = 0.0;
1831   MatScalar      *v;
1832 
1833   PetscFunctionBegin;
1834   if (aij->size == 1) {
1835     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1836   } else {
1837     if (type == NORM_FROBENIUS) {
1838       v = amat->a;
1839       for (i=0; i<amat->nz; i++) {
1840         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1841       }
1842       v = bmat->a;
1843       for (i=0; i<bmat->nz; i++) {
1844         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1845       }
1846       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1847       *norm = PetscSqrtReal(*norm);
1848       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1849     } else if (type == NORM_1) { /* max column norm */
1850       PetscReal *tmp,*tmp2;
1851       PetscInt  *jj,*garray = aij->garray;
1852       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1853       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1854       *norm = 0.0;
1855       v     = amat->a; jj = amat->j;
1856       for (j=0; j<amat->nz; j++) {
1857         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1858       }
1859       v = bmat->a; jj = bmat->j;
1860       for (j=0; j<bmat->nz; j++) {
1861         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1862       }
1863       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1864       for (j=0; j<mat->cmap->N; j++) {
1865         if (tmp2[j] > *norm) *norm = tmp2[j];
1866       }
1867       ierr = PetscFree(tmp);CHKERRQ(ierr);
1868       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1869       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1870     } else if (type == NORM_INFINITY) { /* max row norm */
1871       PetscReal ntemp = 0.0;
1872       for (j=0; j<aij->A->rmap->n; j++) {
1873         v   = amat->a + amat->i[j];
1874         sum = 0.0;
1875         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1876           sum += PetscAbsScalar(*v); v++;
1877         }
1878         v = bmat->a + bmat->i[j];
1879         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1880           sum += PetscAbsScalar(*v); v++;
1881         }
1882         if (sum > ntemp) ntemp = sum;
1883       }
1884       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1885       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1886     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1887   }
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1892 {
1893   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1894   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1895   PetscErrorCode ierr;
1896   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1897   PetscInt       cstart = A->cmap->rstart,ncol;
1898   Mat            B;
1899   MatScalar      *array;
1900 
1901   PetscFunctionBegin;
1902   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1903 
1904   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1905   ai = Aloc->i; aj = Aloc->j;
1906   bi = Bloc->i; bj = Bloc->j;
1907   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1908     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1909     PetscSFNode          *oloc;
1910     PETSC_UNUSED PetscSF sf;
1911 
1912     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1913     /* compute d_nnz for preallocation */
1914     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1915     for (i=0; i<ai[ma]; i++) {
1916       d_nnz[aj[i]]++;
1917       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1918     }
1919     /* compute local off-diagonal contributions */
1920     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1921     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1922     /* map those to global */
1923     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1924     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1925     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1926     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1927     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1928     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1929     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1930 
1931     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1932     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1933     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1934     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1935     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1936     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1937   } else {
1938     B    = *matout;
1939     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1940     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1941   }
1942 
1943   /* copy over the A part */
1944   array = Aloc->a;
1945   row   = A->rmap->rstart;
1946   for (i=0; i<ma; i++) {
1947     ncol = ai[i+1]-ai[i];
1948     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1949     row++;
1950     array += ncol; aj += ncol;
1951   }
1952   aj = Aloc->j;
1953   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1954 
1955   /* copy over the B part */
1956   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1957   array = Bloc->a;
1958   row   = A->rmap->rstart;
1959   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1960   cols_tmp = cols;
1961   for (i=0; i<mb; i++) {
1962     ncol = bi[i+1]-bi[i];
1963     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1964     row++;
1965     array += ncol; cols_tmp += ncol;
1966   }
1967   ierr = PetscFree(cols);CHKERRQ(ierr);
1968 
1969   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1970   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1971   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1972     *matout = B;
1973   } else {
1974     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1975   }
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1980 {
1981   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1982   Mat            a    = aij->A,b = aij->B;
1983   PetscErrorCode ierr;
1984   PetscInt       s1,s2,s3;
1985 
1986   PetscFunctionBegin;
1987   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1988   if (rr) {
1989     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1990     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1991     /* Overlap communication with computation. */
1992     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1993   }
1994   if (ll) {
1995     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1996     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1997     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1998   }
1999   /* scale  the diagonal block */
2000   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2001 
2002   if (rr) {
2003     /* Do a scatter end and then right scale the off-diagonal block */
2004     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2005     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2006   }
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2011 {
2012   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2013   PetscErrorCode ierr;
2014 
2015   PetscFunctionBegin;
2016   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2021 {
2022   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2023   Mat            a,b,c,d;
2024   PetscBool      flg;
2025   PetscErrorCode ierr;
2026 
2027   PetscFunctionBegin;
2028   a = matA->A; b = matA->B;
2029   c = matB->A; d = matB->B;
2030 
2031   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2032   if (flg) {
2033     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2034   }
2035   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2040 {
2041   PetscErrorCode ierr;
2042   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2043   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2044 
2045   PetscFunctionBegin;
2046   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2047   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2048     /* because of the column compression in the off-processor part of the matrix a->B,
2049        the number of columns in a->B and b->B may be different, hence we cannot call
2050        the MatCopy() directly on the two parts. If need be, we can provide a more
2051        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2052        then copying the submatrices */
2053     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2054   } else {
2055     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2056     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2057   }
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2062 {
2063   PetscErrorCode ierr;
2064 
2065   PetscFunctionBegin;
2066   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 /*
2071    Computes the number of nonzeros per row needed for preallocation when X and Y
2072    have different nonzero structure.
2073 */
2074 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2075 {
2076   PetscInt       i,j,k,nzx,nzy;
2077 
2078   PetscFunctionBegin;
2079   /* Set the number of nonzeros in the new matrix */
2080   for (i=0; i<m; i++) {
2081     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2082     nzx = xi[i+1] - xi[i];
2083     nzy = yi[i+1] - yi[i];
2084     nnz[i] = 0;
2085     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2086       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2087       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2088       nnz[i]++;
2089     }
2090     for (; k<nzy; k++) nnz[i]++;
2091   }
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2096 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2097 {
2098   PetscErrorCode ierr;
2099   PetscInt       m = Y->rmap->N;
2100   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2101   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2102 
2103   PetscFunctionBegin;
2104   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2109 {
2110   PetscErrorCode ierr;
2111   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2112   PetscBLASInt   bnz,one=1;
2113   Mat_SeqAIJ     *x,*y;
2114 
2115   PetscFunctionBegin;
2116   if (str == SAME_NONZERO_PATTERN) {
2117     PetscScalar alpha = a;
2118     x    = (Mat_SeqAIJ*)xx->A->data;
2119     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2120     y    = (Mat_SeqAIJ*)yy->A->data;
2121     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2122     x    = (Mat_SeqAIJ*)xx->B->data;
2123     y    = (Mat_SeqAIJ*)yy->B->data;
2124     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2125     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2126     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2127   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2128     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2129   } else {
2130     Mat      B;
2131     PetscInt *nnz_d,*nnz_o;
2132     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2133     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2134     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2135     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2136     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2137     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2138     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2139     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2140     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2141     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2142     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2143     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2144     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2145     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2146   }
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2151 
2152 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2153 {
2154 #if defined(PETSC_USE_COMPLEX)
2155   PetscErrorCode ierr;
2156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2157 
2158   PetscFunctionBegin;
2159   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2160   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2161 #else
2162   PetscFunctionBegin;
2163 #endif
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2168 {
2169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2170   PetscErrorCode ierr;
2171 
2172   PetscFunctionBegin;
2173   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2174   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2179 {
2180   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2181   PetscErrorCode ierr;
2182 
2183   PetscFunctionBegin;
2184   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2185   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2190 {
2191   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2192   PetscErrorCode ierr;
2193   PetscInt       i,*idxb = 0;
2194   PetscScalar    *va,*vb;
2195   Vec            vtmp;
2196 
2197   PetscFunctionBegin;
2198   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2199   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2200   if (idx) {
2201     for (i=0; i<A->rmap->n; i++) {
2202       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2203     }
2204   }
2205 
2206   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2207   if (idx) {
2208     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2209   }
2210   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2211   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2212 
2213   for (i=0; i<A->rmap->n; i++) {
2214     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2215       va[i] = vb[i];
2216       if (idx) idx[i] = a->garray[idxb[i]];
2217     }
2218   }
2219 
2220   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2221   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2222   ierr = PetscFree(idxb);CHKERRQ(ierr);
2223   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2228 {
2229   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2230   PetscErrorCode ierr;
2231   PetscInt       i,*idxb = 0;
2232   PetscScalar    *va,*vb;
2233   Vec            vtmp;
2234 
2235   PetscFunctionBegin;
2236   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2237   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2238   if (idx) {
2239     for (i=0; i<A->cmap->n; i++) {
2240       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2241     }
2242   }
2243 
2244   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2245   if (idx) {
2246     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2247   }
2248   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2249   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2250 
2251   for (i=0; i<A->rmap->n; i++) {
2252     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2253       va[i] = vb[i];
2254       if (idx) idx[i] = a->garray[idxb[i]];
2255     }
2256   }
2257 
2258   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2259   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2260   ierr = PetscFree(idxb);CHKERRQ(ierr);
2261   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2266 {
2267   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2268   PetscInt       n      = A->rmap->n;
2269   PetscInt       cstart = A->cmap->rstart;
2270   PetscInt       *cmap  = mat->garray;
2271   PetscInt       *diagIdx, *offdiagIdx;
2272   Vec            diagV, offdiagV;
2273   PetscScalar    *a, *diagA, *offdiagA;
2274   PetscInt       r;
2275   PetscErrorCode ierr;
2276 
2277   PetscFunctionBegin;
2278   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2279   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2280   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2281   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2282   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2283   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2284   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2285   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2286   for (r = 0; r < n; ++r) {
2287     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2288       a[r]   = diagA[r];
2289       idx[r] = cstart + diagIdx[r];
2290     } else {
2291       a[r]   = offdiagA[r];
2292       idx[r] = cmap[offdiagIdx[r]];
2293     }
2294   }
2295   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2296   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2297   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2298   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2299   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2300   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2305 {
2306   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2307   PetscInt       n      = A->rmap->n;
2308   PetscInt       cstart = A->cmap->rstart;
2309   PetscInt       *cmap  = mat->garray;
2310   PetscInt       *diagIdx, *offdiagIdx;
2311   Vec            diagV, offdiagV;
2312   PetscScalar    *a, *diagA, *offdiagA;
2313   PetscInt       r;
2314   PetscErrorCode ierr;
2315 
2316   PetscFunctionBegin;
2317   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2318   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2319   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2320   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2321   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2322   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2323   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2324   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2325   for (r = 0; r < n; ++r) {
2326     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2327       a[r]   = diagA[r];
2328       idx[r] = cstart + diagIdx[r];
2329     } else {
2330       a[r]   = offdiagA[r];
2331       idx[r] = cmap[offdiagIdx[r]];
2332     }
2333   }
2334   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2335   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2336   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2337   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2338   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2339   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2344 {
2345   PetscErrorCode ierr;
2346   Mat            *dummy;
2347 
2348   PetscFunctionBegin;
2349   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2350   *newmat = *dummy;
2351   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2356 {
2357   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2358   PetscErrorCode ierr;
2359 
2360   PetscFunctionBegin;
2361   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2362   A->factorerrortype = a->A->factorerrortype;
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2367 {
2368   PetscErrorCode ierr;
2369   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2370 
2371   PetscFunctionBegin;
2372   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2373   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2374   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2375   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2380 {
2381   PetscFunctionBegin;
2382   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2383   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 /*@
2388    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2389 
2390    Collective on Mat
2391 
2392    Input Parameters:
2393 +    A - the matrix
2394 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2395 
2396  Level: advanced
2397 
2398 @*/
2399 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2400 {
2401   PetscErrorCode       ierr;
2402 
2403   PetscFunctionBegin;
2404   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2409 {
2410   PetscErrorCode       ierr;
2411   PetscBool            sc = PETSC_FALSE,flg;
2412 
2413   PetscFunctionBegin;
2414   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2415   ierr = PetscObjectOptionsBegin((PetscObject)A);
2416     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2417     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2418     if (flg) {
2419       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2420     }
2421   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2426 {
2427   PetscErrorCode ierr;
2428   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2429   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2430 
2431   PetscFunctionBegin;
2432   if (!Y->preallocated) {
2433     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2434   } else if (!aij->nz) {
2435     PetscInt nonew = aij->nonew;
2436     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2437     aij->nonew = nonew;
2438   }
2439   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2444 {
2445   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2446   PetscErrorCode ierr;
2447 
2448   PetscFunctionBegin;
2449   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2450   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2451   if (d) {
2452     PetscInt rstart;
2453     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2454     *d += rstart;
2455 
2456   }
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 
2461 /* -------------------------------------------------------------------*/
2462 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2463                                        MatGetRow_MPIAIJ,
2464                                        MatRestoreRow_MPIAIJ,
2465                                        MatMult_MPIAIJ,
2466                                 /* 4*/ MatMultAdd_MPIAIJ,
2467                                        MatMultTranspose_MPIAIJ,
2468                                        MatMultTransposeAdd_MPIAIJ,
2469                                        0,
2470                                        0,
2471                                        0,
2472                                 /*10*/ 0,
2473                                        0,
2474                                        0,
2475                                        MatSOR_MPIAIJ,
2476                                        MatTranspose_MPIAIJ,
2477                                 /*15*/ MatGetInfo_MPIAIJ,
2478                                        MatEqual_MPIAIJ,
2479                                        MatGetDiagonal_MPIAIJ,
2480                                        MatDiagonalScale_MPIAIJ,
2481                                        MatNorm_MPIAIJ,
2482                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2483                                        MatAssemblyEnd_MPIAIJ,
2484                                        MatSetOption_MPIAIJ,
2485                                        MatZeroEntries_MPIAIJ,
2486                                 /*24*/ MatZeroRows_MPIAIJ,
2487                                        0,
2488                                        0,
2489                                        0,
2490                                        0,
2491                                 /*29*/ MatSetUp_MPIAIJ,
2492                                        0,
2493                                        0,
2494                                        MatGetDiagonalBlock_MPIAIJ,
2495                                        0,
2496                                 /*34*/ MatDuplicate_MPIAIJ,
2497                                        0,
2498                                        0,
2499                                        0,
2500                                        0,
2501                                 /*39*/ MatAXPY_MPIAIJ,
2502                                        MatGetSubMatrices_MPIAIJ,
2503                                        MatIncreaseOverlap_MPIAIJ,
2504                                        MatGetValues_MPIAIJ,
2505                                        MatCopy_MPIAIJ,
2506                                 /*44*/ MatGetRowMax_MPIAIJ,
2507                                        MatScale_MPIAIJ,
2508                                        MatShift_MPIAIJ,
2509                                        MatDiagonalSet_MPIAIJ,
2510                                        MatZeroRowsColumns_MPIAIJ,
2511                                 /*49*/ MatSetRandom_MPIAIJ,
2512                                        0,
2513                                        0,
2514                                        0,
2515                                        0,
2516                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2517                                        0,
2518                                        MatSetUnfactored_MPIAIJ,
2519                                        MatPermute_MPIAIJ,
2520                                        0,
2521                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2522                                        MatDestroy_MPIAIJ,
2523                                        MatView_MPIAIJ,
2524                                        0,
2525                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2526                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2527                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2528                                        0,
2529                                        0,
2530                                        0,
2531                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2532                                        MatGetRowMinAbs_MPIAIJ,
2533                                        0,
2534                                        0,
2535                                        0,
2536                                        0,
2537                                 /*75*/ MatFDColoringApply_AIJ,
2538                                        MatSetFromOptions_MPIAIJ,
2539                                        0,
2540                                        0,
2541                                        MatFindZeroDiagonals_MPIAIJ,
2542                                 /*80*/ 0,
2543                                        0,
2544                                        0,
2545                                 /*83*/ MatLoad_MPIAIJ,
2546                                        0,
2547                                        0,
2548                                        0,
2549                                        0,
2550                                        0,
2551                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2552                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2553                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2554                                        MatPtAP_MPIAIJ_MPIAIJ,
2555                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2556                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2557                                        0,
2558                                        0,
2559                                        0,
2560                                        0,
2561                                 /*99*/ 0,
2562                                        0,
2563                                        0,
2564                                        MatConjugate_MPIAIJ,
2565                                        0,
2566                                 /*104*/MatSetValuesRow_MPIAIJ,
2567                                        MatRealPart_MPIAIJ,
2568                                        MatImaginaryPart_MPIAIJ,
2569                                        0,
2570                                        0,
2571                                 /*109*/0,
2572                                        0,
2573                                        MatGetRowMin_MPIAIJ,
2574                                        0,
2575                                        MatMissingDiagonal_MPIAIJ,
2576                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2577                                        0,
2578                                        MatGetGhosts_MPIAIJ,
2579                                        0,
2580                                        0,
2581                                 /*119*/0,
2582                                        0,
2583                                        0,
2584                                        0,
2585                                        MatGetMultiProcBlock_MPIAIJ,
2586                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2587                                        MatGetColumnNorms_MPIAIJ,
2588                                        MatInvertBlockDiagonal_MPIAIJ,
2589                                        0,
2590                                        MatGetSubMatricesMPI_MPIAIJ,
2591                                 /*129*/0,
2592                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2593                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2594                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2595                                        0,
2596                                 /*134*/0,
2597                                        0,
2598                                        0,
2599                                        0,
2600                                        0,
2601                                 /*139*/MatSetBlockSizes_MPIAIJ,
2602                                        0,
2603                                        0,
2604                                        MatFDColoringSetUp_MPIXAIJ,
2605                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2606                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2607 };
2608 
2609 /* ----------------------------------------------------------------------------------------*/
2610 
2611 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2612 {
2613   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2614   PetscErrorCode ierr;
2615 
2616   PetscFunctionBegin;
2617   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2618   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2619   PetscFunctionReturn(0);
2620 }
2621 
2622 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2623 {
2624   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2625   PetscErrorCode ierr;
2626 
2627   PetscFunctionBegin;
2628   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2629   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2634 {
2635   Mat_MPIAIJ     *b;
2636   PetscErrorCode ierr;
2637 
2638   PetscFunctionBegin;
2639   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2640   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2641   b = (Mat_MPIAIJ*)B->data;
2642 
2643 #if defined(PETSC_USE_CTABLE)
2644   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2645 #else
2646   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2647 #endif
2648   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2649   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2650   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2651 
2652   /* Because the B will have been resized we simply destroy it and create a new one each time */
2653   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2654   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2655   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2656   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2657   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2658   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2659 
2660   if (!B->preallocated) {
2661     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2662     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2663     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2664     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2665     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2666   }
2667 
2668   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2669   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2670   B->preallocated  = PETSC_TRUE;
2671   B->was_assembled = PETSC_FALSE;
2672   B->assembled     = PETSC_FALSE;;
2673   PetscFunctionReturn(0);
2674 }
2675 
2676 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2677 {
2678   Mat            mat;
2679   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2680   PetscErrorCode ierr;
2681 
2682   PetscFunctionBegin;
2683   *newmat = 0;
2684   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2685   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2686   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2687   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2688   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2689   a       = (Mat_MPIAIJ*)mat->data;
2690 
2691   mat->factortype   = matin->factortype;
2692   mat->assembled    = PETSC_TRUE;
2693   mat->insertmode   = NOT_SET_VALUES;
2694   mat->preallocated = PETSC_TRUE;
2695 
2696   a->size         = oldmat->size;
2697   a->rank         = oldmat->rank;
2698   a->donotstash   = oldmat->donotstash;
2699   a->roworiented  = oldmat->roworiented;
2700   a->rowindices   = 0;
2701   a->rowvalues    = 0;
2702   a->getrowactive = PETSC_FALSE;
2703 
2704   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2705   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2706 
2707   if (oldmat->colmap) {
2708 #if defined(PETSC_USE_CTABLE)
2709     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2710 #else
2711     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2712     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2713     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2714 #endif
2715   } else a->colmap = 0;
2716   if (oldmat->garray) {
2717     PetscInt len;
2718     len  = oldmat->B->cmap->n;
2719     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2720     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2721     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2722   } else a->garray = 0;
2723 
2724   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2725   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2726   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2727   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2728   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2729   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2730   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2731   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2732   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2733   *newmat = mat;
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 
2738 
2739 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2740 {
2741   PetscScalar    *vals,*svals;
2742   MPI_Comm       comm;
2743   PetscErrorCode ierr;
2744   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2745   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2746   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2747   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2748   PetscInt       cend,cstart,n,*rowners;
2749   int            fd;
2750   PetscInt       bs = newMat->rmap->bs;
2751 
2752   PetscFunctionBegin;
2753   /* force binary viewer to load .info file if it has not yet done so */
2754   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2755   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2756   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2757   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2758   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2759   if (!rank) {
2760     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2761     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2762     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2763   }
2764 
2765   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2766   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2767   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2768   if (bs < 0) bs = 1;
2769 
2770   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2771   M    = header[1]; N = header[2];
2772 
2773   /* If global sizes are set, check if they are consistent with that given in the file */
2774   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2775   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2776 
2777   /* determine ownership of all (block) rows */
2778   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2779   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2780   else m = newMat->rmap->n; /* Set by user */
2781 
2782   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2783   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2784 
2785   /* First process needs enough room for process with most rows */
2786   if (!rank) {
2787     mmax = rowners[1];
2788     for (i=2; i<=size; i++) {
2789       mmax = PetscMax(mmax, rowners[i]);
2790     }
2791   } else mmax = -1;             /* unused, but compilers complain */
2792 
2793   rowners[0] = 0;
2794   for (i=2; i<=size; i++) {
2795     rowners[i] += rowners[i-1];
2796   }
2797   rstart = rowners[rank];
2798   rend   = rowners[rank+1];
2799 
2800   /* distribute row lengths to all processors */
2801   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2802   if (!rank) {
2803     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2804     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2805     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2806     for (j=0; j<m; j++) {
2807       procsnz[0] += ourlens[j];
2808     }
2809     for (i=1; i<size; i++) {
2810       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2811       /* calculate the number of nonzeros on each processor */
2812       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2813         procsnz[i] += rowlengths[j];
2814       }
2815       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2816     }
2817     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2818   } else {
2819     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2820   }
2821 
2822   if (!rank) {
2823     /* determine max buffer needed and allocate it */
2824     maxnz = 0;
2825     for (i=0; i<size; i++) {
2826       maxnz = PetscMax(maxnz,procsnz[i]);
2827     }
2828     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2829 
2830     /* read in my part of the matrix column indices  */
2831     nz   = procsnz[0];
2832     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2833     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2834 
2835     /* read in every one elses and ship off */
2836     for (i=1; i<size; i++) {
2837       nz   = procsnz[i];
2838       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2839       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2840     }
2841     ierr = PetscFree(cols);CHKERRQ(ierr);
2842   } else {
2843     /* determine buffer space needed for message */
2844     nz = 0;
2845     for (i=0; i<m; i++) {
2846       nz += ourlens[i];
2847     }
2848     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2849 
2850     /* receive message of column indices*/
2851     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2852   }
2853 
2854   /* determine column ownership if matrix is not square */
2855   if (N != M) {
2856     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2857     else n = newMat->cmap->n;
2858     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2859     cstart = cend - n;
2860   } else {
2861     cstart = rstart;
2862     cend   = rend;
2863     n      = cend - cstart;
2864   }
2865 
2866   /* loop over local rows, determining number of off diagonal entries */
2867   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2868   jj   = 0;
2869   for (i=0; i<m; i++) {
2870     for (j=0; j<ourlens[i]; j++) {
2871       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2872       jj++;
2873     }
2874   }
2875 
2876   for (i=0; i<m; i++) {
2877     ourlens[i] -= offlens[i];
2878   }
2879   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2880 
2881   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2882 
2883   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2884 
2885   for (i=0; i<m; i++) {
2886     ourlens[i] += offlens[i];
2887   }
2888 
2889   if (!rank) {
2890     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2891 
2892     /* read in my part of the matrix numerical values  */
2893     nz   = procsnz[0];
2894     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2895 
2896     /* insert into matrix */
2897     jj      = rstart;
2898     smycols = mycols;
2899     svals   = vals;
2900     for (i=0; i<m; i++) {
2901       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2902       smycols += ourlens[i];
2903       svals   += ourlens[i];
2904       jj++;
2905     }
2906 
2907     /* read in other processors and ship out */
2908     for (i=1; i<size; i++) {
2909       nz   = procsnz[i];
2910       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2911       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2912     }
2913     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2914   } else {
2915     /* receive numeric values */
2916     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2917 
2918     /* receive message of values*/
2919     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2920 
2921     /* insert into matrix */
2922     jj      = rstart;
2923     smycols = mycols;
2924     svals   = vals;
2925     for (i=0; i<m; i++) {
2926       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2927       smycols += ourlens[i];
2928       svals   += ourlens[i];
2929       jj++;
2930     }
2931   }
2932   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2933   ierr = PetscFree(vals);CHKERRQ(ierr);
2934   ierr = PetscFree(mycols);CHKERRQ(ierr);
2935   ierr = PetscFree(rowners);CHKERRQ(ierr);
2936   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2937   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2938   PetscFunctionReturn(0);
2939 }
2940 
2941 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
2942 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2943 {
2944   PetscErrorCode ierr;
2945   IS             iscol_local;
2946   PetscInt       csize;
2947 
2948   PetscFunctionBegin;
2949   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2950   if (call == MAT_REUSE_MATRIX) {
2951     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2952     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2953   } else {
2954     /* check if we are grabbing all columns*/
2955     PetscBool    isstride;
2956     PetscMPIInt  lisstride = 0,gisstride;
2957     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2958     if (isstride) {
2959       PetscInt  start,len,mstart,mlen;
2960       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2961       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2962       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2963       if (mstart == start && mlen-mstart == len) lisstride = 1;
2964     }
2965     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2966     if (gisstride) {
2967       PetscInt N;
2968       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
2969       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
2970       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
2971       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
2972     } else {
2973       PetscInt cbs;
2974       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
2975       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2976       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
2977     }
2978   }
2979   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2980   if (call == MAT_INITIAL_MATRIX) {
2981     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2982     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2983   }
2984   PetscFunctionReturn(0);
2985 }
2986 
2987 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
2988 /*
2989     Not great since it makes two copies of the submatrix, first an SeqAIJ
2990   in local and then by concatenating the local matrices the end result.
2991   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2992 
2993   Note: This requires a sequential iscol with all indices.
2994 */
2995 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2996 {
2997   PetscErrorCode ierr;
2998   PetscMPIInt    rank,size;
2999   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3000   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3001   PetscBool      allcolumns, colflag;
3002   Mat            M,Mreuse;
3003   MatScalar      *vwork,*aa;
3004   MPI_Comm       comm;
3005   Mat_SeqAIJ     *aij;
3006 
3007   PetscFunctionBegin;
3008   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3009   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3010   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3011 
3012   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3013   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3014   if (colflag && ncol == mat->cmap->N) {
3015     allcolumns = PETSC_TRUE;
3016     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3017   } else {
3018     allcolumns = PETSC_FALSE;
3019   }
3020   if (call ==  MAT_REUSE_MATRIX) {
3021     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3022     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3023     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3024   } else {
3025     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3026   }
3027 
3028   /*
3029       m - number of local rows
3030       n - number of columns (same on all processors)
3031       rstart - first row in new global matrix generated
3032   */
3033   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3034   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3035   if (call == MAT_INITIAL_MATRIX) {
3036     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3037     ii  = aij->i;
3038     jj  = aij->j;
3039 
3040     /*
3041         Determine the number of non-zeros in the diagonal and off-diagonal
3042         portions of the matrix in order to do correct preallocation
3043     */
3044 
3045     /* first get start and end of "diagonal" columns */
3046     if (csize == PETSC_DECIDE) {
3047       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3048       if (mglobal == n) { /* square matrix */
3049         nlocal = m;
3050       } else {
3051         nlocal = n/size + ((n % size) > rank);
3052       }
3053     } else {
3054       nlocal = csize;
3055     }
3056     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3057     rstart = rend - nlocal;
3058     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3059 
3060     /* next, compute all the lengths */
3061     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3062     olens = dlens + m;
3063     for (i=0; i<m; i++) {
3064       jend = ii[i+1] - ii[i];
3065       olen = 0;
3066       dlen = 0;
3067       for (j=0; j<jend; j++) {
3068         if (*jj < rstart || *jj >= rend) olen++;
3069         else dlen++;
3070         jj++;
3071       }
3072       olens[i] = olen;
3073       dlens[i] = dlen;
3074     }
3075     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3076     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3077     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3078     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3079     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3080     ierr = PetscFree(dlens);CHKERRQ(ierr);
3081   } else {
3082     PetscInt ml,nl;
3083 
3084     M    = *newmat;
3085     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3086     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3087     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3088     /*
3089          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3090        rather than the slower MatSetValues().
3091     */
3092     M->was_assembled = PETSC_TRUE;
3093     M->assembled     = PETSC_FALSE;
3094   }
3095   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3096   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3097   ii   = aij->i;
3098   jj   = aij->j;
3099   aa   = aij->a;
3100   for (i=0; i<m; i++) {
3101     row   = rstart + i;
3102     nz    = ii[i+1] - ii[i];
3103     cwork = jj;     jj += nz;
3104     vwork = aa;     aa += nz;
3105     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3106   }
3107 
3108   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3109   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3110   *newmat = M;
3111 
3112   /* save submatrix used in processor for next request */
3113   if (call ==  MAT_INITIAL_MATRIX) {
3114     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3115     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3116   }
3117   PetscFunctionReturn(0);
3118 }
3119 
3120 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3121 {
3122   PetscInt       m,cstart, cend,j,nnz,i,d;
3123   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3124   const PetscInt *JJ;
3125   PetscScalar    *values;
3126   PetscErrorCode ierr;
3127 
3128   PetscFunctionBegin;
3129   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3130 
3131   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3132   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3133   m      = B->rmap->n;
3134   cstart = B->cmap->rstart;
3135   cend   = B->cmap->rend;
3136   rstart = B->rmap->rstart;
3137 
3138   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3139 
3140 #if defined(PETSC_USE_DEBUGGING)
3141   for (i=0; i<m; i++) {
3142     nnz = Ii[i+1]- Ii[i];
3143     JJ  = J + Ii[i];
3144     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3145     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3146     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3147   }
3148 #endif
3149 
3150   for (i=0; i<m; i++) {
3151     nnz     = Ii[i+1]- Ii[i];
3152     JJ      = J + Ii[i];
3153     nnz_max = PetscMax(nnz_max,nnz);
3154     d       = 0;
3155     for (j=0; j<nnz; j++) {
3156       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3157     }
3158     d_nnz[i] = d;
3159     o_nnz[i] = nnz - d;
3160   }
3161   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3162   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3163 
3164   if (v) values = (PetscScalar*)v;
3165   else {
3166     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3167   }
3168 
3169   for (i=0; i<m; i++) {
3170     ii   = i + rstart;
3171     nnz  = Ii[i+1]- Ii[i];
3172     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3173   }
3174   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3175   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3176 
3177   if (!v) {
3178     ierr = PetscFree(values);CHKERRQ(ierr);
3179   }
3180   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3181   PetscFunctionReturn(0);
3182 }
3183 
3184 /*@
3185    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3186    (the default parallel PETSc format).
3187 
3188    Collective on MPI_Comm
3189 
3190    Input Parameters:
3191 +  B - the matrix
3192 .  i - the indices into j for the start of each local row (starts with zero)
3193 .  j - the column indices for each local row (starts with zero)
3194 -  v - optional values in the matrix
3195 
3196    Level: developer
3197 
3198    Notes:
3199        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3200      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3201      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3202 
3203        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3204 
3205        The format which is used for the sparse matrix input, is equivalent to a
3206     row-major ordering.. i.e for the following matrix, the input data expected is
3207     as shown
3208 
3209 $        1 0 0
3210 $        2 0 3     P0
3211 $       -------
3212 $        4 5 6     P1
3213 $
3214 $     Process0 [P0]: rows_owned=[0,1]
3215 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3216 $        j =  {0,0,2}  [size = 3]
3217 $        v =  {1,2,3}  [size = 3]
3218 $
3219 $     Process1 [P1]: rows_owned=[2]
3220 $        i =  {0,3}    [size = nrow+1  = 1+1]
3221 $        j =  {0,1,2}  [size = 3]
3222 $        v =  {4,5,6}  [size = 3]
3223 
3224 .keywords: matrix, aij, compressed row, sparse, parallel
3225 
3226 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3227           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3228 @*/
3229 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3230 {
3231   PetscErrorCode ierr;
3232 
3233   PetscFunctionBegin;
3234   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3235   PetscFunctionReturn(0);
3236 }
3237 
3238 /*@C
3239    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3240    (the default parallel PETSc format).  For good matrix assembly performance
3241    the user should preallocate the matrix storage by setting the parameters
3242    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3243    performance can be increased by more than a factor of 50.
3244 
3245    Collective on MPI_Comm
3246 
3247    Input Parameters:
3248 +  B - the matrix
3249 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3250            (same value is used for all local rows)
3251 .  d_nnz - array containing the number of nonzeros in the various rows of the
3252            DIAGONAL portion of the local submatrix (possibly different for each row)
3253            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3254            The size of this array is equal to the number of local rows, i.e 'm'.
3255            For matrices that will be factored, you must leave room for (and set)
3256            the diagonal entry even if it is zero.
3257 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3258            submatrix (same value is used for all local rows).
3259 -  o_nnz - array containing the number of nonzeros in the various rows of the
3260            OFF-DIAGONAL portion of the local submatrix (possibly different for
3261            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3262            structure. The size of this array is equal to the number
3263            of local rows, i.e 'm'.
3264 
3265    If the *_nnz parameter is given then the *_nz parameter is ignored
3266 
3267    The AIJ format (also called the Yale sparse matrix format or
3268    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3269    storage.  The stored row and column indices begin with zero.
3270    See Users-Manual: ch_mat for details.
3271 
3272    The parallel matrix is partitioned such that the first m0 rows belong to
3273    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3274    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3275 
3276    The DIAGONAL portion of the local submatrix of a processor can be defined
3277    as the submatrix which is obtained by extraction the part corresponding to
3278    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3279    first row that belongs to the processor, r2 is the last row belonging to
3280    the this processor, and c1-c2 is range of indices of the local part of a
3281    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3282    common case of a square matrix, the row and column ranges are the same and
3283    the DIAGONAL part is also square. The remaining portion of the local
3284    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3285 
3286    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3287 
3288    You can call MatGetInfo() to get information on how effective the preallocation was;
3289    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3290    You can also run with the option -info and look for messages with the string
3291    malloc in them to see if additional memory allocation was needed.
3292 
3293    Example usage:
3294 
3295    Consider the following 8x8 matrix with 34 non-zero values, that is
3296    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3297    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3298    as follows:
3299 
3300 .vb
3301             1  2  0  |  0  3  0  |  0  4
3302     Proc0   0  5  6  |  7  0  0  |  8  0
3303             9  0 10  | 11  0  0  | 12  0
3304     -------------------------------------
3305            13  0 14  | 15 16 17  |  0  0
3306     Proc1   0 18  0  | 19 20 21  |  0  0
3307             0  0  0  | 22 23  0  | 24  0
3308     -------------------------------------
3309     Proc2  25 26 27  |  0  0 28  | 29  0
3310            30  0  0  | 31 32 33  |  0 34
3311 .ve
3312 
3313    This can be represented as a collection of submatrices as:
3314 
3315 .vb
3316       A B C
3317       D E F
3318       G H I
3319 .ve
3320 
3321    Where the submatrices A,B,C are owned by proc0, D,E,F are
3322    owned by proc1, G,H,I are owned by proc2.
3323 
3324    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3325    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3326    The 'M','N' parameters are 8,8, and have the same values on all procs.
3327 
3328    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3329    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3330    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3331    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3332    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3333    matrix, ans [DF] as another SeqAIJ matrix.
3334 
3335    When d_nz, o_nz parameters are specified, d_nz storage elements are
3336    allocated for every row of the local diagonal submatrix, and o_nz
3337    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3338    One way to choose d_nz and o_nz is to use the max nonzerors per local
3339    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3340    In this case, the values of d_nz,o_nz are:
3341 .vb
3342      proc0 : dnz = 2, o_nz = 2
3343      proc1 : dnz = 3, o_nz = 2
3344      proc2 : dnz = 1, o_nz = 4
3345 .ve
3346    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3347    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3348    for proc3. i.e we are using 12+15+10=37 storage locations to store
3349    34 values.
3350 
3351    When d_nnz, o_nnz parameters are specified, the storage is specified
3352    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3353    In the above case the values for d_nnz,o_nnz are:
3354 .vb
3355      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3356      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3357      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3358 .ve
3359    Here the space allocated is sum of all the above values i.e 34, and
3360    hence pre-allocation is perfect.
3361 
3362    Level: intermediate
3363 
3364 .keywords: matrix, aij, compressed row, sparse, parallel
3365 
3366 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3367           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3368 @*/
3369 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3370 {
3371   PetscErrorCode ierr;
3372 
3373   PetscFunctionBegin;
3374   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3375   PetscValidType(B,1);
3376   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3377   PetscFunctionReturn(0);
3378 }
3379 
3380 /*@
3381      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3382          CSR format the local rows.
3383 
3384    Collective on MPI_Comm
3385 
3386    Input Parameters:
3387 +  comm - MPI communicator
3388 .  m - number of local rows (Cannot be PETSC_DECIDE)
3389 .  n - This value should be the same as the local size used in creating the
3390        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3391        calculated if N is given) For square matrices n is almost always m.
3392 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3393 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3394 .   i - row indices
3395 .   j - column indices
3396 -   a - matrix values
3397 
3398    Output Parameter:
3399 .   mat - the matrix
3400 
3401    Level: intermediate
3402 
3403    Notes:
3404        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3405      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3406      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3407 
3408        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3409 
3410        The format which is used for the sparse matrix input, is equivalent to a
3411     row-major ordering.. i.e for the following matrix, the input data expected is
3412     as shown
3413 
3414 $        1 0 0
3415 $        2 0 3     P0
3416 $       -------
3417 $        4 5 6     P1
3418 $
3419 $     Process0 [P0]: rows_owned=[0,1]
3420 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3421 $        j =  {0,0,2}  [size = 3]
3422 $        v =  {1,2,3}  [size = 3]
3423 $
3424 $     Process1 [P1]: rows_owned=[2]
3425 $        i =  {0,3}    [size = nrow+1  = 1+1]
3426 $        j =  {0,1,2}  [size = 3]
3427 $        v =  {4,5,6}  [size = 3]
3428 
3429 .keywords: matrix, aij, compressed row, sparse, parallel
3430 
3431 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3432           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3433 @*/
3434 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3435 {
3436   PetscErrorCode ierr;
3437 
3438   PetscFunctionBegin;
3439   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3440   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3441   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3442   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3443   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3444   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3445   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3446   PetscFunctionReturn(0);
3447 }
3448 
3449 /*@C
3450    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3451    (the default parallel PETSc format).  For good matrix assembly performance
3452    the user should preallocate the matrix storage by setting the parameters
3453    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3454    performance can be increased by more than a factor of 50.
3455 
3456    Collective on MPI_Comm
3457 
3458    Input Parameters:
3459 +  comm - MPI communicator
3460 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3461            This value should be the same as the local size used in creating the
3462            y vector for the matrix-vector product y = Ax.
3463 .  n - This value should be the same as the local size used in creating the
3464        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3465        calculated if N is given) For square matrices n is almost always m.
3466 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3467 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3468 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3469            (same value is used for all local rows)
3470 .  d_nnz - array containing the number of nonzeros in the various rows of the
3471            DIAGONAL portion of the local submatrix (possibly different for each row)
3472            or NULL, if d_nz is used to specify the nonzero structure.
3473            The size of this array is equal to the number of local rows, i.e 'm'.
3474 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3475            submatrix (same value is used for all local rows).
3476 -  o_nnz - array containing the number of nonzeros in the various rows of the
3477            OFF-DIAGONAL portion of the local submatrix (possibly different for
3478            each row) or NULL, if o_nz is used to specify the nonzero
3479            structure. The size of this array is equal to the number
3480            of local rows, i.e 'm'.
3481 
3482    Output Parameter:
3483 .  A - the matrix
3484 
3485    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3486    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3487    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3488 
3489    Notes:
3490    If the *_nnz parameter is given then the *_nz parameter is ignored
3491 
3492    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3493    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3494    storage requirements for this matrix.
3495 
3496    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3497    processor than it must be used on all processors that share the object for
3498    that argument.
3499 
3500    The user MUST specify either the local or global matrix dimensions
3501    (possibly both).
3502 
3503    The parallel matrix is partitioned across processors such that the
3504    first m0 rows belong to process 0, the next m1 rows belong to
3505    process 1, the next m2 rows belong to process 2 etc.. where
3506    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3507    values corresponding to [m x N] submatrix.
3508 
3509    The columns are logically partitioned with the n0 columns belonging
3510    to 0th partition, the next n1 columns belonging to the next
3511    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3512 
3513    The DIAGONAL portion of the local submatrix on any given processor
3514    is the submatrix corresponding to the rows and columns m,n
3515    corresponding to the given processor. i.e diagonal matrix on
3516    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3517    etc. The remaining portion of the local submatrix [m x (N-n)]
3518    constitute the OFF-DIAGONAL portion. The example below better
3519    illustrates this concept.
3520 
3521    For a square global matrix we define each processor's diagonal portion
3522    to be its local rows and the corresponding columns (a square submatrix);
3523    each processor's off-diagonal portion encompasses the remainder of the
3524    local matrix (a rectangular submatrix).
3525 
3526    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3527 
3528    When calling this routine with a single process communicator, a matrix of
3529    type SEQAIJ is returned.  If a matrix of type MATMPIAIJ is desired for this
3530    type of communicator, use the construction mechanism:
3531      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3532 
3533    By default, this format uses inodes (identical nodes) when possible.
3534    We search for consecutive rows with the same nonzero structure, thereby
3535    reusing matrix information to achieve increased efficiency.
3536 
3537    Options Database Keys:
3538 +  -mat_no_inode  - Do not use inodes
3539 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3540 -  -mat_aij_oneindex - Internally use indexing starting at 1
3541         rather than 0.  Note that when calling MatSetValues(),
3542         the user still MUST index entries starting at 0!
3543 
3544 
3545    Example usage:
3546 
3547    Consider the following 8x8 matrix with 34 non-zero values, that is
3548    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3549    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3550    as follows:
3551 
3552 .vb
3553             1  2  0  |  0  3  0  |  0  4
3554     Proc0   0  5  6  |  7  0  0  |  8  0
3555             9  0 10  | 11  0  0  | 12  0
3556     -------------------------------------
3557            13  0 14  | 15 16 17  |  0  0
3558     Proc1   0 18  0  | 19 20 21  |  0  0
3559             0  0  0  | 22 23  0  | 24  0
3560     -------------------------------------
3561     Proc2  25 26 27  |  0  0 28  | 29  0
3562            30  0  0  | 31 32 33  |  0 34
3563 .ve
3564 
3565    This can be represented as a collection of submatrices as:
3566 
3567 .vb
3568       A B C
3569       D E F
3570       G H I
3571 .ve
3572 
3573    Where the submatrices A,B,C are owned by proc0, D,E,F are
3574    owned by proc1, G,H,I are owned by proc2.
3575 
3576    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3577    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3578    The 'M','N' parameters are 8,8, and have the same values on all procs.
3579 
3580    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3581    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3582    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3583    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3584    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3585    matrix, ans [DF] as another SeqAIJ matrix.
3586 
3587    When d_nz, o_nz parameters are specified, d_nz storage elements are
3588    allocated for every row of the local diagonal submatrix, and o_nz
3589    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3590    One way to choose d_nz and o_nz is to use the max nonzerors per local
3591    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3592    In this case, the values of d_nz,o_nz are:
3593 .vb
3594      proc0 : dnz = 2, o_nz = 2
3595      proc1 : dnz = 3, o_nz = 2
3596      proc2 : dnz = 1, o_nz = 4
3597 .ve
3598    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3599    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3600    for proc3. i.e we are using 12+15+10=37 storage locations to store
3601    34 values.
3602 
3603    When d_nnz, o_nnz parameters are specified, the storage is specified
3604    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3605    In the above case the values for d_nnz,o_nnz are:
3606 .vb
3607      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3608      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3609      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3610 .ve
3611    Here the space allocated is sum of all the above values i.e 34, and
3612    hence pre-allocation is perfect.
3613 
3614    Level: intermediate
3615 
3616 .keywords: matrix, aij, compressed row, sparse, parallel
3617 
3618 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3619           MATMPIAIJ, MatCreateMPIAIJWithArrays()
3620 @*/
3621 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3622 {
3623   PetscErrorCode ierr;
3624   PetscMPIInt    size;
3625 
3626   PetscFunctionBegin;
3627   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3628   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3629   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3630   if (size > 1) {
3631     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3632     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3633   } else {
3634     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3635     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3636   }
3637   PetscFunctionReturn(0);
3638 }
3639 
3640 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3641 {
3642   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3643   PetscBool      flg;
3644   PetscErrorCode ierr;
3645 
3646   PetscFunctionBegin;
3647   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3648   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
3649   if (Ad)     *Ad     = a->A;
3650   if (Ao)     *Ao     = a->B;
3651   if (colmap) *colmap = a->garray;
3652   PetscFunctionReturn(0);
3653 }
3654 
3655 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3656 {
3657   PetscErrorCode ierr;
3658   PetscInt       m,N,i,rstart,nnz,Ii;
3659   PetscInt       *indx;
3660   PetscScalar    *values;
3661 
3662   PetscFunctionBegin;
3663   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3664   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3665     PetscInt       *dnz,*onz,sum,bs,cbs;
3666 
3667     if (n == PETSC_DECIDE) {
3668       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3669     }
3670     /* Check sum(n) = N */
3671     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3672     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3673 
3674     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3675     rstart -= m;
3676 
3677     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3678     for (i=0; i<m; i++) {
3679       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3680       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3681       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3682     }
3683 
3684     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3685     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3686     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3687     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3688     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3689     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3690     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3691   }
3692 
3693   /* numeric phase */
3694   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3695   for (i=0; i<m; i++) {
3696     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3697     Ii   = i + rstart;
3698     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3699     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3700   }
3701   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3702   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3703   PetscFunctionReturn(0);
3704 }
3705 
3706 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3707 {
3708   PetscErrorCode    ierr;
3709   PetscMPIInt       rank;
3710   PetscInt          m,N,i,rstart,nnz;
3711   size_t            len;
3712   const PetscInt    *indx;
3713   PetscViewer       out;
3714   char              *name;
3715   Mat               B;
3716   const PetscScalar *values;
3717 
3718   PetscFunctionBegin;
3719   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3720   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3721   /* Should this be the type of the diagonal block of A? */
3722   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3723   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3724   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3725   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3726   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3727   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3728   for (i=0; i<m; i++) {
3729     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3730     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3731     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3732   }
3733   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3734   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3735 
3736   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3737   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3738   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3739   sprintf(name,"%s.%d",outfile,rank);
3740   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3741   ierr = PetscFree(name);CHKERRQ(ierr);
3742   ierr = MatView(B,out);CHKERRQ(ierr);
3743   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3744   ierr = MatDestroy(&B);CHKERRQ(ierr);
3745   PetscFunctionReturn(0);
3746 }
3747 
3748 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3749 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3750 {
3751   PetscErrorCode      ierr;
3752   Mat_Merge_SeqsToMPI *merge;
3753   PetscContainer      container;
3754 
3755   PetscFunctionBegin;
3756   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3757   if (container) {
3758     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3759     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3760     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3761     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3762     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3763     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3764     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3765     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3766     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3767     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3768     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3769     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3770     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3771     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3772     ierr = PetscFree(merge);CHKERRQ(ierr);
3773     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3774   }
3775   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3776   PetscFunctionReturn(0);
3777 }
3778 
3779 #include <../src/mat/utils/freespace.h>
3780 #include <petscbt.h>
3781 
3782 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3783 {
3784   PetscErrorCode      ierr;
3785   MPI_Comm            comm;
3786   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3787   PetscMPIInt         size,rank,taga,*len_s;
3788   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3789   PetscInt            proc,m;
3790   PetscInt            **buf_ri,**buf_rj;
3791   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3792   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3793   MPI_Request         *s_waits,*r_waits;
3794   MPI_Status          *status;
3795   MatScalar           *aa=a->a;
3796   MatScalar           **abuf_r,*ba_i;
3797   Mat_Merge_SeqsToMPI *merge;
3798   PetscContainer      container;
3799 
3800   PetscFunctionBegin;
3801   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3802   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3803 
3804   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3805   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3806 
3807   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3808   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3809 
3810   bi     = merge->bi;
3811   bj     = merge->bj;
3812   buf_ri = merge->buf_ri;
3813   buf_rj = merge->buf_rj;
3814 
3815   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3816   owners = merge->rowmap->range;
3817   len_s  = merge->len_s;
3818 
3819   /* send and recv matrix values */
3820   /*-----------------------------*/
3821   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3822   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3823 
3824   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
3825   for (proc=0,k=0; proc<size; proc++) {
3826     if (!len_s[proc]) continue;
3827     i    = owners[proc];
3828     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3829     k++;
3830   }
3831 
3832   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3833   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3834   ierr = PetscFree(status);CHKERRQ(ierr);
3835 
3836   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3837   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3838 
3839   /* insert mat values of mpimat */
3840   /*----------------------------*/
3841   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
3842   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
3843 
3844   for (k=0; k<merge->nrecv; k++) {
3845     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3846     nrows       = *(buf_ri_k[k]);
3847     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3848     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
3849   }
3850 
3851   /* set values of ba */
3852   m = merge->rowmap->n;
3853   for (i=0; i<m; i++) {
3854     arow = owners[rank] + i;
3855     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3856     bnzi = bi[i+1] - bi[i];
3857     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3858 
3859     /* add local non-zero vals of this proc's seqmat into ba */
3860     anzi   = ai[arow+1] - ai[arow];
3861     aj     = a->j + ai[arow];
3862     aa     = a->a + ai[arow];
3863     nextaj = 0;
3864     for (j=0; nextaj<anzi; j++) {
3865       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
3866         ba_i[j] += aa[nextaj++];
3867       }
3868     }
3869 
3870     /* add received vals into ba */
3871     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
3872       /* i-th row */
3873       if (i == *nextrow[k]) {
3874         anzi   = *(nextai[k]+1) - *nextai[k];
3875         aj     = buf_rj[k] + *(nextai[k]);
3876         aa     = abuf_r[k] + *(nextai[k]);
3877         nextaj = 0;
3878         for (j=0; nextaj<anzi; j++) {
3879           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
3880             ba_i[j] += aa[nextaj++];
3881           }
3882         }
3883         nextrow[k]++; nextai[k]++;
3884       }
3885     }
3886     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3887   }
3888   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3889   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3890 
3891   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
3892   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3893   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3894   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
3895   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
3900 
3901 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3902 {
3903   PetscErrorCode      ierr;
3904   Mat                 B_mpi;
3905   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
3906   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3907   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
3908   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
3909   PetscInt            len,proc,*dnz,*onz,bs,cbs;
3910   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3911   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3912   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
3913   MPI_Status          *status;
3914   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
3915   PetscBT             lnkbt;
3916   Mat_Merge_SeqsToMPI *merge;
3917   PetscContainer      container;
3918 
3919   PetscFunctionBegin;
3920   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3921 
3922   /* make sure it is a PETSc comm */
3923   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
3924   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3925   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3926 
3927   ierr = PetscNew(&merge);CHKERRQ(ierr);
3928   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
3929 
3930   /* determine row ownership */
3931   /*---------------------------------------------------------*/
3932   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3933   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3934   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
3935   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
3936   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
3937   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
3938   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
3939 
3940   m      = merge->rowmap->n;
3941   owners = merge->rowmap->range;
3942 
3943   /* determine the number of messages to send, their lengths */
3944   /*---------------------------------------------------------*/
3945   len_s = merge->len_s;
3946 
3947   len          = 0; /* length of buf_si[] */
3948   merge->nsend = 0;
3949   for (proc=0; proc<size; proc++) {
3950     len_si[proc] = 0;
3951     if (proc == rank) {
3952       len_s[proc] = 0;
3953     } else {
3954       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3955       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3956     }
3957     if (len_s[proc]) {
3958       merge->nsend++;
3959       nrows = 0;
3960       for (i=owners[proc]; i<owners[proc+1]; i++) {
3961         if (ai[i+1] > ai[i]) nrows++;
3962       }
3963       len_si[proc] = 2*(nrows+1);
3964       len         += len_si[proc];
3965     }
3966   }
3967 
3968   /* determine the number and length of messages to receive for ij-structure */
3969   /*-------------------------------------------------------------------------*/
3970   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3971   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3972 
3973   /* post the Irecv of j-structure */
3974   /*-------------------------------*/
3975   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3976   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3977 
3978   /* post the Isend of j-structure */
3979   /*--------------------------------*/
3980   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
3981 
3982   for (proc=0, k=0; proc<size; proc++) {
3983     if (!len_s[proc]) continue;
3984     i    = owners[proc];
3985     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3986     k++;
3987   }
3988 
3989   /* receives and sends of j-structure are complete */
3990   /*------------------------------------------------*/
3991   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3992   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3993 
3994   /* send and recv i-structure */
3995   /*---------------------------*/
3996   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3997   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3998 
3999   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4000   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4001   for (proc=0,k=0; proc<size; proc++) {
4002     if (!len_s[proc]) continue;
4003     /* form outgoing message for i-structure:
4004          buf_si[0]:                 nrows to be sent
4005                [1:nrows]:           row index (global)
4006                [nrows+1:2*nrows+1]: i-structure index
4007     */
4008     /*-------------------------------------------*/
4009     nrows       = len_si[proc]/2 - 1;
4010     buf_si_i    = buf_si + nrows+1;
4011     buf_si[0]   = nrows;
4012     buf_si_i[0] = 0;
4013     nrows       = 0;
4014     for (i=owners[proc]; i<owners[proc+1]; i++) {
4015       anzi = ai[i+1] - ai[i];
4016       if (anzi) {
4017         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4018         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4019         nrows++;
4020       }
4021     }
4022     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4023     k++;
4024     buf_si += len_si[proc];
4025   }
4026 
4027   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4028   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4029 
4030   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4031   for (i=0; i<merge->nrecv; i++) {
4032     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4033   }
4034 
4035   ierr = PetscFree(len_si);CHKERRQ(ierr);
4036   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4037   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4038   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4039   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4040   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4041   ierr = PetscFree(status);CHKERRQ(ierr);
4042 
4043   /* compute a local seq matrix in each processor */
4044   /*----------------------------------------------*/
4045   /* allocate bi array and free space for accumulating nonzero column info */
4046   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4047   bi[0] = 0;
4048 
4049   /* create and initialize a linked list */
4050   nlnk = N+1;
4051   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4052 
4053   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4054   len  = ai[owners[rank+1]] - ai[owners[rank]];
4055   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4056 
4057   current_space = free_space;
4058 
4059   /* determine symbolic info for each local row */
4060   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4061 
4062   for (k=0; k<merge->nrecv; k++) {
4063     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4064     nrows       = *buf_ri_k[k];
4065     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4066     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4067   }
4068 
4069   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4070   len  = 0;
4071   for (i=0; i<m; i++) {
4072     bnzi = 0;
4073     /* add local non-zero cols of this proc's seqmat into lnk */
4074     arow  = owners[rank] + i;
4075     anzi  = ai[arow+1] - ai[arow];
4076     aj    = a->j + ai[arow];
4077     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4078     bnzi += nlnk;
4079     /* add received col data into lnk */
4080     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4081       if (i == *nextrow[k]) { /* i-th row */
4082         anzi  = *(nextai[k]+1) - *nextai[k];
4083         aj    = buf_rj[k] + *nextai[k];
4084         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4085         bnzi += nlnk;
4086         nextrow[k]++; nextai[k]++;
4087       }
4088     }
4089     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4090 
4091     /* if free space is not available, make more free space */
4092     if (current_space->local_remaining<bnzi) {
4093       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4094       nspacedouble++;
4095     }
4096     /* copy data into free space, then initialize lnk */
4097     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4098     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4099 
4100     current_space->array           += bnzi;
4101     current_space->local_used      += bnzi;
4102     current_space->local_remaining -= bnzi;
4103 
4104     bi[i+1] = bi[i] + bnzi;
4105   }
4106 
4107   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4108 
4109   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4110   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4111   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4112 
4113   /* create symbolic parallel matrix B_mpi */
4114   /*---------------------------------------*/
4115   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4116   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4117   if (n==PETSC_DECIDE) {
4118     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4119   } else {
4120     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4121   }
4122   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4123   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4124   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4125   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4126   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4127 
4128   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4129   B_mpi->assembled    = PETSC_FALSE;
4130   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4131   merge->bi           = bi;
4132   merge->bj           = bj;
4133   merge->buf_ri       = buf_ri;
4134   merge->buf_rj       = buf_rj;
4135   merge->coi          = NULL;
4136   merge->coj          = NULL;
4137   merge->owners_co    = NULL;
4138 
4139   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4140 
4141   /* attach the supporting struct to B_mpi for reuse */
4142   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4143   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4144   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4145   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4146   *mpimat = B_mpi;
4147 
4148   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4149   PetscFunctionReturn(0);
4150 }
4151 
4152 /*@C
4153       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4154                  matrices from each processor
4155 
4156     Collective on MPI_Comm
4157 
4158    Input Parameters:
4159 +    comm - the communicators the parallel matrix will live on
4160 .    seqmat - the input sequential matrices
4161 .    m - number of local rows (or PETSC_DECIDE)
4162 .    n - number of local columns (or PETSC_DECIDE)
4163 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4164 
4165    Output Parameter:
4166 .    mpimat - the parallel matrix generated
4167 
4168     Level: advanced
4169 
4170    Notes:
4171      The dimensions of the sequential matrix in each processor MUST be the same.
4172      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4173      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4174 @*/
4175 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4176 {
4177   PetscErrorCode ierr;
4178   PetscMPIInt    size;
4179 
4180   PetscFunctionBegin;
4181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4182   if (size == 1) {
4183     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4184     if (scall == MAT_INITIAL_MATRIX) {
4185       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4186     } else {
4187       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4188     }
4189     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4190     PetscFunctionReturn(0);
4191   }
4192   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4193   if (scall == MAT_INITIAL_MATRIX) {
4194     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4195   }
4196   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4197   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4198   PetscFunctionReturn(0);
4199 }
4200 
4201 /*@
4202      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4203           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4204           with MatGetSize()
4205 
4206     Not Collective
4207 
4208    Input Parameters:
4209 +    A - the matrix
4210 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4211 
4212    Output Parameter:
4213 .    A_loc - the local sequential matrix generated
4214 
4215     Level: developer
4216 
4217 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4218 
4219 @*/
4220 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4221 {
4222   PetscErrorCode ierr;
4223   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4224   Mat_SeqAIJ     *mat,*a,*b;
4225   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4226   MatScalar      *aa,*ba,*cam;
4227   PetscScalar    *ca;
4228   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4229   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4230   PetscBool      match;
4231   MPI_Comm       comm;
4232   PetscMPIInt    size;
4233 
4234   PetscFunctionBegin;
4235   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4236   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4237   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4238   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4239   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4240 
4241   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4242   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4243   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4244   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4245   aa = a->a; ba = b->a;
4246   if (scall == MAT_INITIAL_MATRIX) {
4247     if (size == 1) {
4248       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4249       PetscFunctionReturn(0);
4250     }
4251 
4252     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4253     ci[0] = 0;
4254     for (i=0; i<am; i++) {
4255       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4256     }
4257     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4258     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4259     k    = 0;
4260     for (i=0; i<am; i++) {
4261       ncols_o = bi[i+1] - bi[i];
4262       ncols_d = ai[i+1] - ai[i];
4263       /* off-diagonal portion of A */
4264       for (jo=0; jo<ncols_o; jo++) {
4265         col = cmap[*bj];
4266         if (col >= cstart) break;
4267         cj[k]   = col; bj++;
4268         ca[k++] = *ba++;
4269       }
4270       /* diagonal portion of A */
4271       for (j=0; j<ncols_d; j++) {
4272         cj[k]   = cstart + *aj++;
4273         ca[k++] = *aa++;
4274       }
4275       /* off-diagonal portion of A */
4276       for (j=jo; j<ncols_o; j++) {
4277         cj[k]   = cmap[*bj++];
4278         ca[k++] = *ba++;
4279       }
4280     }
4281     /* put together the new matrix */
4282     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4283     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4284     /* Since these are PETSc arrays, change flags to free them as necessary. */
4285     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4286     mat->free_a  = PETSC_TRUE;
4287     mat->free_ij = PETSC_TRUE;
4288     mat->nonew   = 0;
4289   } else if (scall == MAT_REUSE_MATRIX) {
4290     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4291     ci = mat->i; cj = mat->j; cam = mat->a;
4292     for (i=0; i<am; i++) {
4293       /* off-diagonal portion of A */
4294       ncols_o = bi[i+1] - bi[i];
4295       for (jo=0; jo<ncols_o; jo++) {
4296         col = cmap[*bj];
4297         if (col >= cstart) break;
4298         *cam++ = *ba++; bj++;
4299       }
4300       /* diagonal portion of A */
4301       ncols_d = ai[i+1] - ai[i];
4302       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4303       /* off-diagonal portion of A */
4304       for (j=jo; j<ncols_o; j++) {
4305         *cam++ = *ba++; bj++;
4306       }
4307     }
4308   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4309   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4310   PetscFunctionReturn(0);
4311 }
4312 
4313 /*@C
4314      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4315 
4316     Not Collective
4317 
4318    Input Parameters:
4319 +    A - the matrix
4320 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4321 -    row, col - index sets of rows and columns to extract (or NULL)
4322 
4323    Output Parameter:
4324 .    A_loc - the local sequential matrix generated
4325 
4326     Level: developer
4327 
4328 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4329 
4330 @*/
4331 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4332 {
4333   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4334   PetscErrorCode ierr;
4335   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4336   IS             isrowa,iscola;
4337   Mat            *aloc;
4338   PetscBool      match;
4339 
4340   PetscFunctionBegin;
4341   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4342   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4343   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4344   if (!row) {
4345     start = A->rmap->rstart; end = A->rmap->rend;
4346     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4347   } else {
4348     isrowa = *row;
4349   }
4350   if (!col) {
4351     start = A->cmap->rstart;
4352     cmap  = a->garray;
4353     nzA   = a->A->cmap->n;
4354     nzB   = a->B->cmap->n;
4355     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4356     ncols = 0;
4357     for (i=0; i<nzB; i++) {
4358       if (cmap[i] < start) idx[ncols++] = cmap[i];
4359       else break;
4360     }
4361     imark = i;
4362     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4363     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4364     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4365   } else {
4366     iscola = *col;
4367   }
4368   if (scall != MAT_INITIAL_MATRIX) {
4369     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4370     aloc[0] = *A_loc;
4371   }
4372   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4373   *A_loc = aloc[0];
4374   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4375   if (!row) {
4376     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4377   }
4378   if (!col) {
4379     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4380   }
4381   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4382   PetscFunctionReturn(0);
4383 }
4384 
4385 /*@C
4386     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4387 
4388     Collective on Mat
4389 
4390    Input Parameters:
4391 +    A,B - the matrices in mpiaij format
4392 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4393 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4394 
4395    Output Parameter:
4396 +    rowb, colb - index sets of rows and columns of B to extract
4397 -    B_seq - the sequential matrix generated
4398 
4399     Level: developer
4400 
4401 @*/
4402 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4403 {
4404   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4405   PetscErrorCode ierr;
4406   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4407   IS             isrowb,iscolb;
4408   Mat            *bseq=NULL;
4409 
4410   PetscFunctionBegin;
4411   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4412     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4413   }
4414   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4415 
4416   if (scall == MAT_INITIAL_MATRIX) {
4417     start = A->cmap->rstart;
4418     cmap  = a->garray;
4419     nzA   = a->A->cmap->n;
4420     nzB   = a->B->cmap->n;
4421     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4422     ncols = 0;
4423     for (i=0; i<nzB; i++) {  /* row < local row index */
4424       if (cmap[i] < start) idx[ncols++] = cmap[i];
4425       else break;
4426     }
4427     imark = i;
4428     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4429     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4430     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4431     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4432   } else {
4433     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4434     isrowb  = *rowb; iscolb = *colb;
4435     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4436     bseq[0] = *B_seq;
4437   }
4438   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4439   *B_seq = bseq[0];
4440   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4441   if (!rowb) {
4442     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4443   } else {
4444     *rowb = isrowb;
4445   }
4446   if (!colb) {
4447     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4448   } else {
4449     *colb = iscolb;
4450   }
4451   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4452   PetscFunctionReturn(0);
4453 }
4454 
4455 /*
4456     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4457     of the OFF-DIAGONAL portion of local A
4458 
4459     Collective on Mat
4460 
4461    Input Parameters:
4462 +    A,B - the matrices in mpiaij format
4463 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4464 
4465    Output Parameter:
4466 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4467 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4468 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4469 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4470 
4471     Level: developer
4472 
4473 */
4474 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4475 {
4476   VecScatter_MPI_General *gen_to,*gen_from;
4477   PetscErrorCode         ierr;
4478   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4479   Mat_SeqAIJ             *b_oth;
4480   VecScatter             ctx =a->Mvctx;
4481   MPI_Comm               comm;
4482   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4483   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4484   PetscScalar            *rvalues,*svalues;
4485   MatScalar              *b_otha,*bufa,*bufA;
4486   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4487   MPI_Request            *rwaits = NULL,*swaits = NULL;
4488   MPI_Status             *sstatus,rstatus;
4489   PetscMPIInt            jj,size;
4490   PetscInt               *cols,sbs,rbs;
4491   PetscScalar            *vals;
4492 
4493   PetscFunctionBegin;
4494   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4495   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4496 
4497   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4498     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4499   }
4500   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4501   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4502 
4503   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4504   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4505   rvalues  = gen_from->values; /* holds the length of receiving row */
4506   svalues  = gen_to->values;   /* holds the length of sending row */
4507   nrecvs   = gen_from->n;
4508   nsends   = gen_to->n;
4509 
4510   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4511   srow    = gen_to->indices;    /* local row index to be sent */
4512   sstarts = gen_to->starts;
4513   sprocs  = gen_to->procs;
4514   sstatus = gen_to->sstatus;
4515   sbs     = gen_to->bs;
4516   rstarts = gen_from->starts;
4517   rprocs  = gen_from->procs;
4518   rbs     = gen_from->bs;
4519 
4520   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4521   if (scall == MAT_INITIAL_MATRIX) {
4522     /* i-array */
4523     /*---------*/
4524     /*  post receives */
4525     for (i=0; i<nrecvs; i++) {
4526       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4527       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4528       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4529     }
4530 
4531     /* pack the outgoing message */
4532     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4533 
4534     sstartsj[0] = 0;
4535     rstartsj[0] = 0;
4536     len         = 0; /* total length of j or a array to be sent */
4537     k           = 0;
4538     for (i=0; i<nsends; i++) {
4539       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4540       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4541       for (j=0; j<nrows; j++) {
4542         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4543         for (l=0; l<sbs; l++) {
4544           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4545 
4546           rowlen[j*sbs+l] = ncols;
4547 
4548           len += ncols;
4549           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4550         }
4551         k++;
4552       }
4553       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4554 
4555       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4556     }
4557     /* recvs and sends of i-array are completed */
4558     i = nrecvs;
4559     while (i--) {
4560       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4561     }
4562     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4563 
4564     /* allocate buffers for sending j and a arrays */
4565     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4566     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4567 
4568     /* create i-array of B_oth */
4569     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4570 
4571     b_othi[0] = 0;
4572     len       = 0; /* total length of j or a array to be received */
4573     k         = 0;
4574     for (i=0; i<nrecvs; i++) {
4575       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4576       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4577       for (j=0; j<nrows; j++) {
4578         b_othi[k+1] = b_othi[k] + rowlen[j];
4579         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4580         k++;
4581       }
4582       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4583     }
4584 
4585     /* allocate space for j and a arrrays of B_oth */
4586     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4587     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4588 
4589     /* j-array */
4590     /*---------*/
4591     /*  post receives of j-array */
4592     for (i=0; i<nrecvs; i++) {
4593       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4594       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4595     }
4596 
4597     /* pack the outgoing message j-array */
4598     k = 0;
4599     for (i=0; i<nsends; i++) {
4600       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4601       bufJ  = bufj+sstartsj[i];
4602       for (j=0; j<nrows; j++) {
4603         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4604         for (ll=0; ll<sbs; ll++) {
4605           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4606           for (l=0; l<ncols; l++) {
4607             *bufJ++ = cols[l];
4608           }
4609           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4610         }
4611       }
4612       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4613     }
4614 
4615     /* recvs and sends of j-array are completed */
4616     i = nrecvs;
4617     while (i--) {
4618       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4619     }
4620     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4621   } else if (scall == MAT_REUSE_MATRIX) {
4622     sstartsj = *startsj_s;
4623     rstartsj = *startsj_r;
4624     bufa     = *bufa_ptr;
4625     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4626     b_otha   = b_oth->a;
4627   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4628 
4629   /* a-array */
4630   /*---------*/
4631   /*  post receives of a-array */
4632   for (i=0; i<nrecvs; i++) {
4633     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4634     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4635   }
4636 
4637   /* pack the outgoing message a-array */
4638   k = 0;
4639   for (i=0; i<nsends; i++) {
4640     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4641     bufA  = bufa+sstartsj[i];
4642     for (j=0; j<nrows; j++) {
4643       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4644       for (ll=0; ll<sbs; ll++) {
4645         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4646         for (l=0; l<ncols; l++) {
4647           *bufA++ = vals[l];
4648         }
4649         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4650       }
4651     }
4652     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4653   }
4654   /* recvs and sends of a-array are completed */
4655   i = nrecvs;
4656   while (i--) {
4657     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4658   }
4659   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4660   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4661 
4662   if (scall == MAT_INITIAL_MATRIX) {
4663     /* put together the new matrix */
4664     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4665 
4666     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4667     /* Since these are PETSc arrays, change flags to free them as necessary. */
4668     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4669     b_oth->free_a  = PETSC_TRUE;
4670     b_oth->free_ij = PETSC_TRUE;
4671     b_oth->nonew   = 0;
4672 
4673     ierr = PetscFree(bufj);CHKERRQ(ierr);
4674     if (!startsj_s || !bufa_ptr) {
4675       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4676       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4677     } else {
4678       *startsj_s = sstartsj;
4679       *startsj_r = rstartsj;
4680       *bufa_ptr  = bufa;
4681     }
4682   }
4683   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4684   PetscFunctionReturn(0);
4685 }
4686 
4687 /*@C
4688   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4689 
4690   Not Collective
4691 
4692   Input Parameters:
4693 . A - The matrix in mpiaij format
4694 
4695   Output Parameter:
4696 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4697 . colmap - A map from global column index to local index into lvec
4698 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4699 
4700   Level: developer
4701 
4702 @*/
4703 #if defined(PETSC_USE_CTABLE)
4704 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4705 #else
4706 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4707 #endif
4708 {
4709   Mat_MPIAIJ *a;
4710 
4711   PetscFunctionBegin;
4712   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4713   PetscValidPointer(lvec, 2);
4714   PetscValidPointer(colmap, 3);
4715   PetscValidPointer(multScatter, 4);
4716   a = (Mat_MPIAIJ*) A->data;
4717   if (lvec) *lvec = a->lvec;
4718   if (colmap) *colmap = a->colmap;
4719   if (multScatter) *multScatter = a->Mvctx;
4720   PetscFunctionReturn(0);
4721 }
4722 
4723 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4724 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4725 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4726 #if defined(PETSC_HAVE_ELEMENTAL)
4727 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4728 #endif
4729 #if defined(PETSC_HAVE_HYPRE)
4730 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
4731 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
4732 #endif
4733 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
4734 
4735 /*
4736     Computes (B'*A')' since computing B*A directly is untenable
4737 
4738                n                       p                          p
4739         (              )       (              )         (                  )
4740       m (      A       )  *  n (       B      )   =   m (         C        )
4741         (              )       (              )         (                  )
4742 
4743 */
4744 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4745 {
4746   PetscErrorCode ierr;
4747   Mat            At,Bt,Ct;
4748 
4749   PetscFunctionBegin;
4750   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4751   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4752   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4753   ierr = MatDestroy(&At);CHKERRQ(ierr);
4754   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4755   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4756   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4757   PetscFunctionReturn(0);
4758 }
4759 
4760 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4761 {
4762   PetscErrorCode ierr;
4763   PetscInt       m=A->rmap->n,n=B->cmap->n;
4764   Mat            Cmat;
4765 
4766   PetscFunctionBegin;
4767   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4768   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4769   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4770   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4771   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4772   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4773   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4774   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4775 
4776   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4777 
4778   *C = Cmat;
4779   PetscFunctionReturn(0);
4780 }
4781 
4782 /* ----------------------------------------------------------------*/
4783 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4784 {
4785   PetscErrorCode ierr;
4786 
4787   PetscFunctionBegin;
4788   if (scall == MAT_INITIAL_MATRIX) {
4789     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4790     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4791     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4792   }
4793   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4794   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4795   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4796   PetscFunctionReturn(0);
4797 }
4798 
4799 /*MC
4800    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4801 
4802    Options Database Keys:
4803 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4804 
4805   Level: beginner
4806 
4807 .seealso: MatCreateAIJ()
4808 M*/
4809 
4810 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
4811 {
4812   Mat_MPIAIJ     *b;
4813   PetscErrorCode ierr;
4814   PetscMPIInt    size;
4815 
4816   PetscFunctionBegin;
4817   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
4818 
4819   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
4820   B->data       = (void*)b;
4821   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4822   B->assembled  = PETSC_FALSE;
4823   B->insertmode = NOT_SET_VALUES;
4824   b->size       = size;
4825 
4826   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
4827 
4828   /* build cache for off array entries formed */
4829   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
4830 
4831   b->donotstash  = PETSC_FALSE;
4832   b->colmap      = 0;
4833   b->garray      = 0;
4834   b->roworiented = PETSC_TRUE;
4835 
4836   /* stuff used for matrix vector multiply */
4837   b->lvec  = NULL;
4838   b->Mvctx = NULL;
4839 
4840   /* stuff for MatGetRow() */
4841   b->rowindices   = 0;
4842   b->rowvalues    = 0;
4843   b->getrowactive = PETSC_FALSE;
4844 
4845   /* flexible pointer used in CUSP/CUSPARSE classes */
4846   b->spptr = NULL;
4847 
4848   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
4849   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4850   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4851   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4852   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4853   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4854   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4855   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
4856   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
4857   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
4858 #if defined(PETSC_HAVE_ELEMENTAL)
4859   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
4860 #endif
4861 #if defined(PETSC_HAVE_HYPRE)
4862   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
4863 #endif
4864   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
4865   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
4866   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
4867   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
4868 #if defined(PETSC_HAVE_HYPRE)
4869   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
4870 #endif
4871   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4872   PetscFunctionReturn(0);
4873 }
4874 
4875 /*@C
4876      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4877          and "off-diagonal" part of the matrix in CSR format.
4878 
4879    Collective on MPI_Comm
4880 
4881    Input Parameters:
4882 +  comm - MPI communicator
4883 .  m - number of local rows (Cannot be PETSC_DECIDE)
4884 .  n - This value should be the same as the local size used in creating the
4885        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4886        calculated if N is given) For square matrices n is almost always m.
4887 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4888 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4889 .   i - row indices for "diagonal" portion of matrix
4890 .   j - column indices
4891 .   a - matrix values
4892 .   oi - row indices for "off-diagonal" portion of matrix
4893 .   oj - column indices
4894 -   oa - matrix values
4895 
4896    Output Parameter:
4897 .   mat - the matrix
4898 
4899    Level: advanced
4900 
4901    Notes:
4902        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
4903        must free the arrays once the matrix has been destroyed and not before.
4904 
4905        The i and j indices are 0 based
4906 
4907        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4908 
4909        This sets local rows and cannot be used to set off-processor values.
4910 
4911        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
4912        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
4913        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
4914        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
4915        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
4916        communication if it is known that only local entries will be set.
4917 
4918 .keywords: matrix, aij, compressed row, sparse, parallel
4919 
4920 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4921           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
4922 @*/
4923 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4924 {
4925   PetscErrorCode ierr;
4926   Mat_MPIAIJ     *maij;
4927 
4928   PetscFunctionBegin;
4929   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4930   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4931   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4932   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4933   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4934   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4935   maij = (Mat_MPIAIJ*) (*mat)->data;
4936 
4937   (*mat)->preallocated = PETSC_TRUE;
4938 
4939   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
4940   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
4941 
4942   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4943   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4944 
4945   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4946   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4947   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4948   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4949 
4950   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4951   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4952   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4953   PetscFunctionReturn(0);
4954 }
4955 
4956 /*
4957     Special version for direct calls from Fortran
4958 */
4959 #include <petsc/private/fortranimpl.h>
4960 
4961 /* Change these macros so can be used in void function */
4962 #undef CHKERRQ
4963 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
4964 #undef SETERRQ2
4965 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
4966 #undef SETERRQ3
4967 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
4968 #undef SETERRQ
4969 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
4970 
4971 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4972 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4974 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4975 #else
4976 #endif
4977 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4978 {
4979   Mat            mat  = *mmat;
4980   PetscInt       m    = *mm, n = *mn;
4981   InsertMode     addv = *maddv;
4982   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4983   PetscScalar    value;
4984   PetscErrorCode ierr;
4985 
4986   MatCheckPreallocated(mat,1);
4987   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
4988 
4989 #if defined(PETSC_USE_DEBUG)
4990   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4991 #endif
4992   {
4993     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
4994     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
4995     PetscBool roworiented = aij->roworiented;
4996 
4997     /* Some Variables required in the macro */
4998     Mat        A                 = aij->A;
4999     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5000     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5001     MatScalar  *aa               = a->a;
5002     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5003     Mat        B                 = aij->B;
5004     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5005     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5006     MatScalar  *ba               = b->a;
5007 
5008     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5009     PetscInt  nonew = a->nonew;
5010     MatScalar *ap1,*ap2;
5011 
5012     PetscFunctionBegin;
5013     for (i=0; i<m; i++) {
5014       if (im[i] < 0) continue;
5015 #if defined(PETSC_USE_DEBUG)
5016       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5017 #endif
5018       if (im[i] >= rstart && im[i] < rend) {
5019         row      = im[i] - rstart;
5020         lastcol1 = -1;
5021         rp1      = aj + ai[row];
5022         ap1      = aa + ai[row];
5023         rmax1    = aimax[row];
5024         nrow1    = ailen[row];
5025         low1     = 0;
5026         high1    = nrow1;
5027         lastcol2 = -1;
5028         rp2      = bj + bi[row];
5029         ap2      = ba + bi[row];
5030         rmax2    = bimax[row];
5031         nrow2    = bilen[row];
5032         low2     = 0;
5033         high2    = nrow2;
5034 
5035         for (j=0; j<n; j++) {
5036           if (roworiented) value = v[i*n+j];
5037           else value = v[i+j*m];
5038           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5039           if (in[j] >= cstart && in[j] < cend) {
5040             col = in[j] - cstart;
5041             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5042           } else if (in[j] < 0) continue;
5043 #if defined(PETSC_USE_DEBUG)
5044           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5045 #endif
5046           else {
5047             if (mat->was_assembled) {
5048               if (!aij->colmap) {
5049                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5050               }
5051 #if defined(PETSC_USE_CTABLE)
5052               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5053               col--;
5054 #else
5055               col = aij->colmap[in[j]] - 1;
5056 #endif
5057               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5058                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5059                 col  =  in[j];
5060                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5061                 B     = aij->B;
5062                 b     = (Mat_SeqAIJ*)B->data;
5063                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5064                 rp2   = bj + bi[row];
5065                 ap2   = ba + bi[row];
5066                 rmax2 = bimax[row];
5067                 nrow2 = bilen[row];
5068                 low2  = 0;
5069                 high2 = nrow2;
5070                 bm    = aij->B->rmap->n;
5071                 ba    = b->a;
5072               }
5073             } else col = in[j];
5074             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5075           }
5076         }
5077       } else if (!aij->donotstash) {
5078         if (roworiented) {
5079           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5080         } else {
5081           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5082         }
5083       }
5084     }
5085   }
5086   PetscFunctionReturnVoid();
5087 }
5088 
5089