xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6186d1c35b56f725f4b9cc80452a1fafcee863dd)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *owners = A->rmap->range;
787   PetscInt       n      = A->rmap->n;
788   PetscSF        sf;
789   PetscInt      *lrows;
790   PetscSFNode   *rrows;
791   PetscInt       r, p = 0, len = 0;
792   PetscErrorCode ierr;
793 
794   PetscFunctionBegin;
795   /* Create SF where leaves are input rows and roots are owned rows */
796   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
797   for (r = 0; r < n; ++r) lrows[r] = -1;
798   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
799   for (r = 0; r < N; ++r) {
800     const PetscInt idx   = rows[r];
801     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
802     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
803       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
804     }
805     if (A->nooffproczerorows) {
806       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
807       lrows[len++] = idx - owners[p];
808     } else {
809       rrows[r].rank = p;
810       rrows[r].index = rows[r] - owners[p];
811     }
812   }
813   if (!A->nooffproczerorows) {
814     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
815     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
816     /* Collect flags for rows to be zeroed */
817     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
818     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
819     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
820     /* Compress and put in row numbers */
821     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
822   }
823   /* fix right hand side if needed */
824   if (x && b) {
825     const PetscScalar *xx;
826     PetscScalar       *bb;
827 
828     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
829     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
830     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
831     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
833   }
834   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
835   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
836   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
837     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
838   } else if (diag != 0.0) {
839     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
841     for (r = 0; r < len; ++r) {
842       const PetscInt row = lrows[r] + A->rmap->rstart;
843       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
844     }
845     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
846     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
847   } else {
848     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
849   }
850   ierr = PetscFree(lrows);CHKERRQ(ierr);
851 
852   /* only change matrix nonzero state if pattern was allowed to be changed */
853   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
854     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
855     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
856   }
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
862 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
865   PetscErrorCode    ierr;
866   PetscMPIInt       n = A->rmap->n;
867   PetscInt          i,j,r,m,p = 0,len = 0;
868   PetscInt          *lrows,*owners = A->rmap->range;
869   PetscSFNode       *rrows;
870   PetscSF           sf;
871   const PetscScalar *xx;
872   PetscScalar       *bb,*mask;
873   Vec               xmask,lmask;
874   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
875   const PetscInt    *aj, *ii,*ridx;
876   PetscScalar       *aa;
877 
878   PetscFunctionBegin;
879   /* Create SF where leaves are input rows and roots are owned rows */
880   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
881   for (r = 0; r < n; ++r) lrows[r] = -1;
882   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
883   for (r = 0; r < N; ++r) {
884     const PetscInt idx   = rows[r];
885     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
886     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
887       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
888     }
889     rrows[r].rank  = p;
890     rrows[r].index = rows[r] - owners[p];
891   }
892   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
893   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
894   /* Collect flags for rows to be zeroed */
895   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
896   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
897   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
898   /* Compress and put in row numbers */
899   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
900   /* zero diagonal part of matrix */
901   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
902   /* handle off diagonal part of matrix */
903   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
904   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
905   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
906   for (i=0; i<len; i++) bb[lrows[i]] = 1;
907   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
908   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
910   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
911   if (x) {
912     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
913     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
915     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
916   }
917   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
918   /* remove zeroed rows of off diagonal matrix */
919   ii = aij->i;
920   for (i=0; i<len; i++) {
921     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
922   }
923   /* loop over all elements of off process part of matrix zeroing removed columns*/
924   if (aij->compressedrow.use) {
925     m    = aij->compressedrow.nrows;
926     ii   = aij->compressedrow.i;
927     ridx = aij->compressedrow.rindex;
928     for (i=0; i<m; i++) {
929       n  = ii[i+1] - ii[i];
930       aj = aij->j + ii[i];
931       aa = aij->a + ii[i];
932 
933       for (j=0; j<n; j++) {
934         if (PetscAbsScalar(mask[*aj])) {
935           if (b) bb[*ridx] -= *aa*xx[*aj];
936           *aa = 0.0;
937         }
938         aa++;
939         aj++;
940       }
941       ridx++;
942     }
943   } else { /* do not use compressed row format */
944     m = l->B->rmap->n;
945     for (i=0; i<m; i++) {
946       n  = ii[i+1] - ii[i];
947       aj = aij->j + ii[i];
948       aa = aij->a + ii[i];
949       for (j=0; j<n; j++) {
950         if (PetscAbsScalar(mask[*aj])) {
951           if (b) bb[i] -= *aa*xx[*aj];
952           *aa = 0.0;
953         }
954         aa++;
955         aj++;
956       }
957     }
958   }
959   if (x) {
960     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
961     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
962   }
963   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
964   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
965   ierr = PetscFree(lrows);CHKERRQ(ierr);
966 
967   /* only change matrix nonzero state if pattern was allowed to be changed */
968   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
969     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
970     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
971   }
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatMult_MPIAIJ"
977 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
978 {
979   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
980   PetscErrorCode ierr;
981   PetscInt       nt;
982 
983   PetscFunctionBegin;
984   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
985   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
986   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
988   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
995 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
996 {
997   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
998   PetscErrorCode ierr;
999 
1000   PetscFunctionBegin;
1001   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1007 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1008 {
1009   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1010   PetscErrorCode ierr;
1011 
1012   PetscFunctionBegin;
1013   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1015   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1023 {
1024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1025   PetscErrorCode ierr;
1026   PetscBool      merged;
1027 
1028   PetscFunctionBegin;
1029   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1030   /* do nondiagonal part */
1031   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1032   if (!merged) {
1033     /* send it on its way */
1034     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1035     /* do local part */
1036     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1037     /* receive remote parts: note this assumes the values are not actually */
1038     /* added in yy until the next line, */
1039     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1040   } else {
1041     /* do local part */
1042     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1043     /* send it on its way */
1044     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1045     /* values actually were received in the Begin() but we need to call this nop */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1053 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1054 {
1055   MPI_Comm       comm;
1056   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1057   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1058   IS             Me,Notme;
1059   PetscErrorCode ierr;
1060   PetscInt       M,N,first,last,*notme,i;
1061   PetscMPIInt    size;
1062 
1063   PetscFunctionBegin;
1064   /* Easy test: symmetric diagonal block */
1065   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1066   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1067   if (!*f) PetscFunctionReturn(0);
1068   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1069   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1070   if (size == 1) PetscFunctionReturn(0);
1071 
1072   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1073   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1074   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1075   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1076   for (i=0; i<first; i++) notme[i] = i;
1077   for (i=last; i<M; i++) notme[i-last+first] = i;
1078   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1079   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1080   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1081   Aoff = Aoffs[0];
1082   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1083   Boff = Boffs[0];
1084   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1085   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1086   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1087   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1088   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1089   ierr = PetscFree(notme);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1095 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1096 {
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101   /* do nondiagonal part */
1102   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1103   /* send it on its way */
1104   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1105   /* do local part */
1106   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1107   /* receive remote parts */
1108   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 /*
1113   This only works correctly for square matrices where the subblock A->A is the
1114    diagonal block
1115 */
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1118 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1119 {
1120   PetscErrorCode ierr;
1121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1122 
1123   PetscFunctionBegin;
1124   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1125   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1126   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatScale_MPIAIJ"
1132 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1139   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatDestroy_MPIAIJ"
1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1146 {
1147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151 #if defined(PETSC_USE_LOG)
1152   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1153 #endif
1154   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1155   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1156   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1157   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1158 #if defined(PETSC_USE_CTABLE)
1159   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1160 #else
1161   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1162 #endif
1163   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1164   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1165   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1166   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1167   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1168   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1169 
1170   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1179 #if defined(PETSC_HAVE_ELEMENTAL)
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1181 #endif
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1187 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1188 {
1189   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1190   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1191   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1192   PetscErrorCode ierr;
1193   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1194   int            fd;
1195   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1196   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1197   PetscScalar    *column_values;
1198   PetscInt       message_count,flowcontrolcount;
1199   FILE           *file;
1200 
1201   PetscFunctionBegin;
1202   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1203   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1204   nz   = A->nz + B->nz;
1205   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1206   if (!rank) {
1207     header[0] = MAT_FILE_CLASSID;
1208     header[1] = mat->rmap->N;
1209     header[2] = mat->cmap->N;
1210 
1211     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1213     /* get largest number of rows any processor has */
1214     rlen  = mat->rmap->n;
1215     range = mat->rmap->range;
1216     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1217   } else {
1218     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     rlen = mat->rmap->n;
1220   }
1221 
1222   /* load up the local row counts */
1223   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1224   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1225 
1226   /* store the row lengths to the file */
1227   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1228   if (!rank) {
1229     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1230     for (i=1; i<size; i++) {
1231       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1232       rlen = range[i+1] - range[i];
1233       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1235     }
1236     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1237   } else {
1238     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1239     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1241   }
1242   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1243 
1244   /* load up the local column indices */
1245   nzmax = nz; /* th processor needs space a largest processor needs */
1246   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1248   cnt   = 0;
1249   for (i=0; i<mat->rmap->n; i++) {
1250     for (j=B->i[i]; j<B->i[i+1]; j++) {
1251       if ((col = garray[B->j[j]]) > cstart) break;
1252       column_indices[cnt++] = col;
1253     }
1254     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1255     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1256   }
1257   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1258 
1259   /* store the column indices to the file */
1260   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1261   if (!rank) {
1262     MPI_Status status;
1263     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     for (i=1; i<size; i++) {
1265       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1266       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1267       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1268       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1270     }
1271     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1272   } else {
1273     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1274     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1275     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1277   }
1278   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1279 
1280   /* load up the local column values */
1281   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1282   cnt  = 0;
1283   for (i=0; i<mat->rmap->n; i++) {
1284     for (j=B->i[i]; j<B->i[i+1]; j++) {
1285       if (garray[B->j[j]] > cstart) break;
1286       column_values[cnt++] = B->a[j];
1287     }
1288     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1289     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1290   }
1291   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1292 
1293   /* store the column values to the file */
1294   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1295   if (!rank) {
1296     MPI_Status status;
1297     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1298     for (i=1; i<size; i++) {
1299       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1300       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1301       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1302       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1303       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1304     }
1305     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1306   } else {
1307     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1308     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1309     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1311   }
1312   ierr = PetscFree(column_values);CHKERRQ(ierr);
1313 
1314   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1315   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #include <petscdraw.h>
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1322 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1323 {
1324   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1325   PetscErrorCode    ierr;
1326   PetscMPIInt       rank = aij->rank,size = aij->size;
1327   PetscBool         isdraw,iascii,isbinary;
1328   PetscViewer       sviewer;
1329   PetscViewerFormat format;
1330 
1331   PetscFunctionBegin;
1332   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1333   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1334   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1335   if (iascii) {
1336     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1337     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1338       MatInfo   info;
1339       PetscBool inodes;
1340 
1341       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1342       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1343       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1344       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1345       if (!inodes) {
1346         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1347                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1348       } else {
1349         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1350                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1351       }
1352       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1353       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1354       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1355       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1356       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1357       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1358       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1359       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1360       PetscFunctionReturn(0);
1361     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1362       PetscInt inodecount,inodelimit,*inodes;
1363       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1364       if (inodes) {
1365         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1366       } else {
1367         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1368       }
1369       PetscFunctionReturn(0);
1370     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1371       PetscFunctionReturn(0);
1372     }
1373   } else if (isbinary) {
1374     if (size == 1) {
1375       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1376       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1377     } else {
1378       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1379     }
1380     PetscFunctionReturn(0);
1381   } else if (isdraw) {
1382     PetscDraw draw;
1383     PetscBool isnull;
1384     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1385     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1386   }
1387 
1388   {
1389     /* assemble the entire matrix onto first processor. */
1390     Mat        A;
1391     Mat_SeqAIJ *Aloc;
1392     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1393     MatScalar  *a;
1394 
1395     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1396     if (!rank) {
1397       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1398     } else {
1399       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1400     }
1401     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1402     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1403     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1404     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1405     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1406 
1407     /* copy over the A part */
1408     Aloc = (Mat_SeqAIJ*)aij->A->data;
1409     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1410     row  = mat->rmap->rstart;
1411     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1412     for (i=0; i<m; i++) {
1413       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1414       row++;
1415       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1416     }
1417     aj = Aloc->j;
1418     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1419 
1420     /* copy over the B part */
1421     Aloc = (Mat_SeqAIJ*)aij->B->data;
1422     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1423     row  = mat->rmap->rstart;
1424     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1425     ct   = cols;
1426     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1427     for (i=0; i<m; i++) {
1428       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1429       row++;
1430       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1431     }
1432     ierr = PetscFree(ct);CHKERRQ(ierr);
1433     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1434     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1435     /*
1436        Everyone has to call to draw the matrix since the graphics waits are
1437        synchronized across all processors that share the PetscDraw object
1438     */
1439     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1440     if (!rank) {
1441       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1442       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1443     }
1444     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1445     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1446     ierr = MatDestroy(&A);CHKERRQ(ierr);
1447   }
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatView_MPIAIJ"
1453 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1454 {
1455   PetscErrorCode ierr;
1456   PetscBool      iascii,isdraw,issocket,isbinary;
1457 
1458   PetscFunctionBegin;
1459   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1460   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1461   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1462   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1463   if (iascii || isdraw || isbinary || issocket) {
1464     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatSOR_MPIAIJ"
1471 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1472 {
1473   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1474   PetscErrorCode ierr;
1475   Vec            bb1 = 0;
1476   PetscBool      hasop;
1477 
1478   PetscFunctionBegin;
1479   if (flag == SOR_APPLY_UPPER) {
1480     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1481     PetscFunctionReturn(0);
1482   }
1483 
1484   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1485     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1486   }
1487 
1488   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1489     if (flag & SOR_ZERO_INITIAL_GUESS) {
1490       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1491       its--;
1492     }
1493 
1494     while (its--) {
1495       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1496       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1497 
1498       /* update rhs: bb1 = bb - B*x */
1499       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1500       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1501 
1502       /* local sweep */
1503       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1504     }
1505   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1506     if (flag & SOR_ZERO_INITIAL_GUESS) {
1507       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1508       its--;
1509     }
1510     while (its--) {
1511       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1512       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1513 
1514       /* update rhs: bb1 = bb - B*x */
1515       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1516       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1517 
1518       /* local sweep */
1519       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1520     }
1521   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1522     if (flag & SOR_ZERO_INITIAL_GUESS) {
1523       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1524       its--;
1525     }
1526     while (its--) {
1527       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1528       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1529 
1530       /* update rhs: bb1 = bb - B*x */
1531       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1532       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1533 
1534       /* local sweep */
1535       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1536     }
1537   } else if (flag & SOR_EISENSTAT) {
1538     Vec xx1;
1539 
1540     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1541     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1542 
1543     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1544     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1545     if (!mat->diag) {
1546       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1547       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1548     }
1549     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1550     if (hasop) {
1551       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1552     } else {
1553       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1554     }
1555     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1556 
1557     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1558 
1559     /* local sweep */
1560     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1561     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1562     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1563   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1564 
1565   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1566 
1567   matin->errortype = mat->A->errortype;
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatPermute_MPIAIJ"
1573 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1574 {
1575   Mat            aA,aB,Aperm;
1576   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1577   PetscScalar    *aa,*ba;
1578   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1579   PetscSF        rowsf,sf;
1580   IS             parcolp = NULL;
1581   PetscBool      done;
1582   PetscErrorCode ierr;
1583 
1584   PetscFunctionBegin;
1585   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1586   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1587   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1588   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1589 
1590   /* Invert row permutation to find out where my rows should go */
1591   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1592   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1593   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1594   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1595   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1596   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1597 
1598   /* Invert column permutation to find out where my columns should go */
1599   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1600   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1601   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1602   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1603   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1604   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1605   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1606 
1607   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1608   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1609   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1610 
1611   /* Find out where my gcols should go */
1612   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1613   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1614   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1615   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1616   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1617   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1618   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1619   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1620 
1621   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1622   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1623   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1624   for (i=0; i<m; i++) {
1625     PetscInt row = rdest[i],rowner;
1626     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1627     for (j=ai[i]; j<ai[i+1]; j++) {
1628       PetscInt cowner,col = cdest[aj[j]];
1629       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1630       if (rowner == cowner) dnnz[i]++;
1631       else onnz[i]++;
1632     }
1633     for (j=bi[i]; j<bi[i+1]; j++) {
1634       PetscInt cowner,col = gcdest[bj[j]];
1635       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1636       if (rowner == cowner) dnnz[i]++;
1637       else onnz[i]++;
1638     }
1639   }
1640   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1641   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1642   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1643   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1644   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1645 
1646   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1647   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1648   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1649   for (i=0; i<m; i++) {
1650     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1651     PetscInt j0,rowlen;
1652     rowlen = ai[i+1] - ai[i];
1653     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1654       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1655       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1656     }
1657     rowlen = bi[i+1] - bi[i];
1658     for (j0=j=0; j<rowlen; j0=j) {
1659       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1660       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1661     }
1662   }
1663   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1664   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1665   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1666   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1667   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1668   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1669   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1670   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1671   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1672   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1673   *B = Aperm;
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1679 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1680 {
1681   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1682   PetscErrorCode ierr;
1683 
1684   PetscFunctionBegin;
1685   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1686   if (ghosts) *ghosts = aij->garray;
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1692 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1693 {
1694   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1695   Mat            A    = mat->A,B = mat->B;
1696   PetscErrorCode ierr;
1697   PetscReal      isend[5],irecv[5];
1698 
1699   PetscFunctionBegin;
1700   info->block_size = 1.0;
1701   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1702 
1703   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1704   isend[3] = info->memory;  isend[4] = info->mallocs;
1705 
1706   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1707 
1708   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1709   isend[3] += info->memory;  isend[4] += info->mallocs;
1710   if (flag == MAT_LOCAL) {
1711     info->nz_used      = isend[0];
1712     info->nz_allocated = isend[1];
1713     info->nz_unneeded  = isend[2];
1714     info->memory       = isend[3];
1715     info->mallocs      = isend[4];
1716   } else if (flag == MAT_GLOBAL_MAX) {
1717     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1718 
1719     info->nz_used      = irecv[0];
1720     info->nz_allocated = irecv[1];
1721     info->nz_unneeded  = irecv[2];
1722     info->memory       = irecv[3];
1723     info->mallocs      = irecv[4];
1724   } else if (flag == MAT_GLOBAL_SUM) {
1725     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1726 
1727     info->nz_used      = irecv[0];
1728     info->nz_allocated = irecv[1];
1729     info->nz_unneeded  = irecv[2];
1730     info->memory       = irecv[3];
1731     info->mallocs      = irecv[4];
1732   }
1733   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1734   info->fill_ratio_needed = 0;
1735   info->factor_mallocs    = 0;
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatSetOption_MPIAIJ"
1741 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1742 {
1743   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1744   PetscErrorCode ierr;
1745 
1746   PetscFunctionBegin;
1747   switch (op) {
1748   case MAT_NEW_NONZERO_LOCATIONS:
1749   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1750   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1751   case MAT_KEEP_NONZERO_PATTERN:
1752   case MAT_NEW_NONZERO_LOCATION_ERR:
1753   case MAT_USE_INODES:
1754   case MAT_IGNORE_ZERO_ENTRIES:
1755     MatCheckPreallocated(A,1);
1756     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1757     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1758     break;
1759   case MAT_ROW_ORIENTED:
1760     a->roworiented = flg;
1761 
1762     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1763     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1764     break;
1765   case MAT_NEW_DIAGONALS:
1766     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1767     break;
1768   case MAT_IGNORE_OFF_PROC_ENTRIES:
1769     a->donotstash = flg;
1770     break;
1771   case MAT_SPD:
1772     A->spd_set = PETSC_TRUE;
1773     A->spd     = flg;
1774     if (flg) {
1775       A->symmetric                  = PETSC_TRUE;
1776       A->structurally_symmetric     = PETSC_TRUE;
1777       A->symmetric_set              = PETSC_TRUE;
1778       A->structurally_symmetric_set = PETSC_TRUE;
1779     }
1780     break;
1781   case MAT_SYMMETRIC:
1782     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1783     break;
1784   case MAT_STRUCTURALLY_SYMMETRIC:
1785     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1786     break;
1787   case MAT_HERMITIAN:
1788     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1789     break;
1790   case MAT_SYMMETRY_ETERNAL:
1791     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1792     break;
1793   default:
1794     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1795   }
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatGetRow_MPIAIJ"
1801 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1802 {
1803   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1804   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1805   PetscErrorCode ierr;
1806   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1807   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1808   PetscInt       *cmap,*idx_p;
1809 
1810   PetscFunctionBegin;
1811   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1812   mat->getrowactive = PETSC_TRUE;
1813 
1814   if (!mat->rowvalues && (idx || v)) {
1815     /*
1816         allocate enough space to hold information from the longest row.
1817     */
1818     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1819     PetscInt   max = 1,tmp;
1820     for (i=0; i<matin->rmap->n; i++) {
1821       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1822       if (max < tmp) max = tmp;
1823     }
1824     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1825   }
1826 
1827   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1828   lrow = row - rstart;
1829 
1830   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1831   if (!v)   {pvA = 0; pvB = 0;}
1832   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1833   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1834   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1835   nztot = nzA + nzB;
1836 
1837   cmap = mat->garray;
1838   if (v  || idx) {
1839     if (nztot) {
1840       /* Sort by increasing column numbers, assuming A and B already sorted */
1841       PetscInt imark = -1;
1842       if (v) {
1843         *v = v_p = mat->rowvalues;
1844         for (i=0; i<nzB; i++) {
1845           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1846           else break;
1847         }
1848         imark = i;
1849         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1850         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1851       }
1852       if (idx) {
1853         *idx = idx_p = mat->rowindices;
1854         if (imark > -1) {
1855           for (i=0; i<imark; i++) {
1856             idx_p[i] = cmap[cworkB[i]];
1857           }
1858         } else {
1859           for (i=0; i<nzB; i++) {
1860             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1861             else break;
1862           }
1863           imark = i;
1864         }
1865         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1866         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1867       }
1868     } else {
1869       if (idx) *idx = 0;
1870       if (v)   *v   = 0;
1871     }
1872   }
1873   *nz  = nztot;
1874   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1875   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 #undef __FUNCT__
1880 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1881 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1882 {
1883   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1884 
1885   PetscFunctionBegin;
1886   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1887   aij->getrowactive = PETSC_FALSE;
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 #undef __FUNCT__
1892 #define __FUNCT__ "MatNorm_MPIAIJ"
1893 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1894 {
1895   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1896   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1897   PetscErrorCode ierr;
1898   PetscInt       i,j,cstart = mat->cmap->rstart;
1899   PetscReal      sum = 0.0;
1900   MatScalar      *v;
1901 
1902   PetscFunctionBegin;
1903   if (aij->size == 1) {
1904     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1905   } else {
1906     if (type == NORM_FROBENIUS) {
1907       v = amat->a;
1908       for (i=0; i<amat->nz; i++) {
1909         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1910       }
1911       v = bmat->a;
1912       for (i=0; i<bmat->nz; i++) {
1913         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1914       }
1915       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1916       *norm = PetscSqrtReal(*norm);
1917     } else if (type == NORM_1) { /* max column norm */
1918       PetscReal *tmp,*tmp2;
1919       PetscInt  *jj,*garray = aij->garray;
1920       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1921       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1922       *norm = 0.0;
1923       v     = amat->a; jj = amat->j;
1924       for (j=0; j<amat->nz; j++) {
1925         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1926       }
1927       v = bmat->a; jj = bmat->j;
1928       for (j=0; j<bmat->nz; j++) {
1929         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1930       }
1931       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1932       for (j=0; j<mat->cmap->N; j++) {
1933         if (tmp2[j] > *norm) *norm = tmp2[j];
1934       }
1935       ierr = PetscFree(tmp);CHKERRQ(ierr);
1936       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1937     } else if (type == NORM_INFINITY) { /* max row norm */
1938       PetscReal ntemp = 0.0;
1939       for (j=0; j<aij->A->rmap->n; j++) {
1940         v   = amat->a + amat->i[j];
1941         sum = 0.0;
1942         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1943           sum += PetscAbsScalar(*v); v++;
1944         }
1945         v = bmat->a + bmat->i[j];
1946         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1947           sum += PetscAbsScalar(*v); v++;
1948         }
1949         if (sum > ntemp) ntemp = sum;
1950       }
1951       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1952     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1953   }
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatTranspose_MPIAIJ"
1959 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1960 {
1961   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1962   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1963   PetscErrorCode ierr;
1964   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1965   PetscInt       cstart = A->cmap->rstart,ncol;
1966   Mat            B;
1967   MatScalar      *array;
1968 
1969   PetscFunctionBegin;
1970   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1971 
1972   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1973   ai = Aloc->i; aj = Aloc->j;
1974   bi = Bloc->i; bj = Bloc->j;
1975   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1976     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1977     PetscSFNode          *oloc;
1978     PETSC_UNUSED PetscSF sf;
1979 
1980     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1981     /* compute d_nnz for preallocation */
1982     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1983     for (i=0; i<ai[ma]; i++) {
1984       d_nnz[aj[i]]++;
1985       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1986     }
1987     /* compute local off-diagonal contributions */
1988     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1989     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1990     /* map those to global */
1991     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1992     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1993     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1994     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1995     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1996     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1997     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1998 
1999     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2000     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2001     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2002     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2003     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2004     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2005   } else {
2006     B    = *matout;
2007     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2008     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2009   }
2010 
2011   /* copy over the A part */
2012   array = Aloc->a;
2013   row   = A->rmap->rstart;
2014   for (i=0; i<ma; i++) {
2015     ncol = ai[i+1]-ai[i];
2016     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2017     row++;
2018     array += ncol; aj += ncol;
2019   }
2020   aj = Aloc->j;
2021   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2022 
2023   /* copy over the B part */
2024   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2025   array = Bloc->a;
2026   row   = A->rmap->rstart;
2027   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2028   cols_tmp = cols;
2029   for (i=0; i<mb; i++) {
2030     ncol = bi[i+1]-bi[i];
2031     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2032     row++;
2033     array += ncol; cols_tmp += ncol;
2034   }
2035   ierr = PetscFree(cols);CHKERRQ(ierr);
2036 
2037   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2038   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2039   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2040     *matout = B;
2041   } else {
2042     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2043   }
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 #undef __FUNCT__
2048 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2049 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2050 {
2051   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2052   Mat            a    = aij->A,b = aij->B;
2053   PetscErrorCode ierr;
2054   PetscInt       s1,s2,s3;
2055 
2056   PetscFunctionBegin;
2057   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2058   if (rr) {
2059     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2060     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2061     /* Overlap communication with computation. */
2062     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2063   }
2064   if (ll) {
2065     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2066     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2067     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2068   }
2069   /* scale  the diagonal block */
2070   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2071 
2072   if (rr) {
2073     /* Do a scatter end and then right scale the off-diagonal block */
2074     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2075     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2076   }
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 #undef __FUNCT__
2081 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2082 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2083 {
2084   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2085   PetscErrorCode ierr;
2086 
2087   PetscFunctionBegin;
2088   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 #undef __FUNCT__
2093 #define __FUNCT__ "MatEqual_MPIAIJ"
2094 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2095 {
2096   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2097   Mat            a,b,c,d;
2098   PetscBool      flg;
2099   PetscErrorCode ierr;
2100 
2101   PetscFunctionBegin;
2102   a = matA->A; b = matA->B;
2103   c = matB->A; d = matB->B;
2104 
2105   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2106   if (flg) {
2107     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2108   }
2109   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 #undef __FUNCT__
2114 #define __FUNCT__ "MatCopy_MPIAIJ"
2115 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2116 {
2117   PetscErrorCode ierr;
2118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2119   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2120 
2121   PetscFunctionBegin;
2122   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2123   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2124     /* because of the column compression in the off-processor part of the matrix a->B,
2125        the number of columns in a->B and b->B may be different, hence we cannot call
2126        the MatCopy() directly on the two parts. If need be, we can provide a more
2127        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2128        then copying the submatrices */
2129     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2130   } else {
2131     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2132     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2133   }
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 #undef __FUNCT__
2138 #define __FUNCT__ "MatSetUp_MPIAIJ"
2139 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2140 {
2141   PetscErrorCode ierr;
2142 
2143   PetscFunctionBegin;
2144   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 /*
2149    Computes the number of nonzeros per row needed for preallocation when X and Y
2150    have different nonzero structure.
2151 */
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2154 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2155 {
2156   PetscInt       i,j,k,nzx,nzy;
2157 
2158   PetscFunctionBegin;
2159   /* Set the number of nonzeros in the new matrix */
2160   for (i=0; i<m; i++) {
2161     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2162     nzx = xi[i+1] - xi[i];
2163     nzy = yi[i+1] - yi[i];
2164     nnz[i] = 0;
2165     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2166       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2167       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2168       nnz[i]++;
2169     }
2170     for (; k<nzy; k++) nnz[i]++;
2171   }
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2176 #undef __FUNCT__
2177 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2178 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2179 {
2180   PetscErrorCode ierr;
2181   PetscInt       m = Y->rmap->N;
2182   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2183   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2184 
2185   PetscFunctionBegin;
2186   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 #undef __FUNCT__
2191 #define __FUNCT__ "MatAXPY_MPIAIJ"
2192 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2193 {
2194   PetscErrorCode ierr;
2195   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2196   PetscBLASInt   bnz,one=1;
2197   Mat_SeqAIJ     *x,*y;
2198 
2199   PetscFunctionBegin;
2200   if (str == SAME_NONZERO_PATTERN) {
2201     PetscScalar alpha = a;
2202     x    = (Mat_SeqAIJ*)xx->A->data;
2203     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2204     y    = (Mat_SeqAIJ*)yy->A->data;
2205     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2206     x    = (Mat_SeqAIJ*)xx->B->data;
2207     y    = (Mat_SeqAIJ*)yy->B->data;
2208     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2209     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2210     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2211   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2212     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2213   } else {
2214     Mat      B;
2215     PetscInt *nnz_d,*nnz_o;
2216     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2217     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2218     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2219     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2220     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2221     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2222     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2223     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2224     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2225     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2226     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2227     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2228     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2229     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2230   }
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2235 
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatConjugate_MPIAIJ"
2238 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2239 {
2240 #if defined(PETSC_USE_COMPLEX)
2241   PetscErrorCode ierr;
2242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2243 
2244   PetscFunctionBegin;
2245   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2246   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2247 #else
2248   PetscFunctionBegin;
2249 #endif
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatRealPart_MPIAIJ"
2255 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2256 {
2257   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2258   PetscErrorCode ierr;
2259 
2260   PetscFunctionBegin;
2261   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2262   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 #undef __FUNCT__
2267 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2268 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2269 {
2270   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2271   PetscErrorCode ierr;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2275   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 #undef __FUNCT__
2280 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2281 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2282 {
2283   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2284   PetscErrorCode ierr;
2285   PetscInt       i,*idxb = 0;
2286   PetscScalar    *va,*vb;
2287   Vec            vtmp;
2288 
2289   PetscFunctionBegin;
2290   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2291   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2292   if (idx) {
2293     for (i=0; i<A->rmap->n; i++) {
2294       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2295     }
2296   }
2297 
2298   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2299   if (idx) {
2300     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2301   }
2302   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2303   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2304 
2305   for (i=0; i<A->rmap->n; i++) {
2306     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2307       va[i] = vb[i];
2308       if (idx) idx[i] = a->garray[idxb[i]];
2309     }
2310   }
2311 
2312   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2313   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2314   ierr = PetscFree(idxb);CHKERRQ(ierr);
2315   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 #undef __FUNCT__
2320 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2321 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2322 {
2323   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2324   PetscErrorCode ierr;
2325   PetscInt       i,*idxb = 0;
2326   PetscScalar    *va,*vb;
2327   Vec            vtmp;
2328 
2329   PetscFunctionBegin;
2330   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2331   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2332   if (idx) {
2333     for (i=0; i<A->cmap->n; i++) {
2334       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2335     }
2336   }
2337 
2338   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2339   if (idx) {
2340     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2341   }
2342   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2343   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2344 
2345   for (i=0; i<A->rmap->n; i++) {
2346     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2347       va[i] = vb[i];
2348       if (idx) idx[i] = a->garray[idxb[i]];
2349     }
2350   }
2351 
2352   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2353   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2354   ierr = PetscFree(idxb);CHKERRQ(ierr);
2355   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 #undef __FUNCT__
2360 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2361 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2362 {
2363   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2364   PetscInt       n      = A->rmap->n;
2365   PetscInt       cstart = A->cmap->rstart;
2366   PetscInt       *cmap  = mat->garray;
2367   PetscInt       *diagIdx, *offdiagIdx;
2368   Vec            diagV, offdiagV;
2369   PetscScalar    *a, *diagA, *offdiagA;
2370   PetscInt       r;
2371   PetscErrorCode ierr;
2372 
2373   PetscFunctionBegin;
2374   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2375   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2376   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2377   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2378   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2379   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2380   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2381   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2382   for (r = 0; r < n; ++r) {
2383     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2384       a[r]   = diagA[r];
2385       idx[r] = cstart + diagIdx[r];
2386     } else {
2387       a[r]   = offdiagA[r];
2388       idx[r] = cmap[offdiagIdx[r]];
2389     }
2390   }
2391   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2392   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2393   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2394   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2395   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2396   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2402 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2403 {
2404   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2405   PetscInt       n      = A->rmap->n;
2406   PetscInt       cstart = A->cmap->rstart;
2407   PetscInt       *cmap  = mat->garray;
2408   PetscInt       *diagIdx, *offdiagIdx;
2409   Vec            diagV, offdiagV;
2410   PetscScalar    *a, *diagA, *offdiagA;
2411   PetscInt       r;
2412   PetscErrorCode ierr;
2413 
2414   PetscFunctionBegin;
2415   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2416   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2417   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2418   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2419   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2420   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2421   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2422   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2423   for (r = 0; r < n; ++r) {
2424     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2425       a[r]   = diagA[r];
2426       idx[r] = cstart + diagIdx[r];
2427     } else {
2428       a[r]   = offdiagA[r];
2429       idx[r] = cmap[offdiagIdx[r]];
2430     }
2431   }
2432   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2433   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2434   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2435   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2436   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2437   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 #undef __FUNCT__
2442 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2443 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2444 {
2445   PetscErrorCode ierr;
2446   Mat            *dummy;
2447 
2448   PetscFunctionBegin;
2449   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2450   *newmat = *dummy;
2451   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 #undef __FUNCT__
2456 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2457 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2458 {
2459   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2460   PetscErrorCode ierr;
2461 
2462   PetscFunctionBegin;
2463   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2464   A->errortype = a->A->errortype;
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 #undef __FUNCT__
2469 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2470 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2471 {
2472   PetscErrorCode ierr;
2473   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2474 
2475   PetscFunctionBegin;
2476   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2477   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2478   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2479   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2485 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2486 {
2487   PetscFunctionBegin;
2488   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2489   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2490   PetscFunctionReturn(0);
2491 }
2492 
2493 #undef __FUNCT__
2494 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2495 /*@
2496    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2497 
2498    Collective on Mat
2499 
2500    Input Parameters:
2501 +    A - the matrix
2502 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2503 
2504  Level: advanced
2505 
2506 @*/
2507 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2508 {
2509   PetscErrorCode       ierr;
2510 
2511   PetscFunctionBegin;
2512   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 #undef __FUNCT__
2517 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2518 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2519 {
2520   PetscErrorCode       ierr;
2521   PetscBool            sc = PETSC_FALSE,flg;
2522 
2523   PetscFunctionBegin;
2524   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2525   ierr = PetscObjectOptionsBegin((PetscObject)A);
2526     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2527     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2528     if (flg) {
2529       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2530     }
2531   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 #undef __FUNCT__
2536 #define __FUNCT__ "MatShift_MPIAIJ"
2537 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2538 {
2539   PetscErrorCode ierr;
2540   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2541   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2542 
2543   PetscFunctionBegin;
2544   if (!Y->preallocated) {
2545     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2546   } else if (!aij->nz) {
2547     PetscInt nonew = aij->nonew;
2548     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2549     aij->nonew = nonew;
2550   }
2551   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 #undef __FUNCT__
2556 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2557 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2558 {
2559   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2560   PetscErrorCode ierr;
2561 
2562   PetscFunctionBegin;
2563   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2564   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2565   if (d) {
2566     PetscInt rstart;
2567     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2568     *d += rstart;
2569 
2570   }
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 
2575 /* -------------------------------------------------------------------*/
2576 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2577                                        MatGetRow_MPIAIJ,
2578                                        MatRestoreRow_MPIAIJ,
2579                                        MatMult_MPIAIJ,
2580                                 /* 4*/ MatMultAdd_MPIAIJ,
2581                                        MatMultTranspose_MPIAIJ,
2582                                        MatMultTransposeAdd_MPIAIJ,
2583                                        0,
2584                                        0,
2585                                        0,
2586                                 /*10*/ 0,
2587                                        0,
2588                                        0,
2589                                        MatSOR_MPIAIJ,
2590                                        MatTranspose_MPIAIJ,
2591                                 /*15*/ MatGetInfo_MPIAIJ,
2592                                        MatEqual_MPIAIJ,
2593                                        MatGetDiagonal_MPIAIJ,
2594                                        MatDiagonalScale_MPIAIJ,
2595                                        MatNorm_MPIAIJ,
2596                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2597                                        MatAssemblyEnd_MPIAIJ,
2598                                        MatSetOption_MPIAIJ,
2599                                        MatZeroEntries_MPIAIJ,
2600                                 /*24*/ MatZeroRows_MPIAIJ,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                        0,
2605                                 /*29*/ MatSetUp_MPIAIJ,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                 /*34*/ MatDuplicate_MPIAIJ,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                 /*39*/ MatAXPY_MPIAIJ,
2616                                        MatGetSubMatrices_MPIAIJ,
2617                                        MatIncreaseOverlap_MPIAIJ,
2618                                        MatGetValues_MPIAIJ,
2619                                        MatCopy_MPIAIJ,
2620                                 /*44*/ MatGetRowMax_MPIAIJ,
2621                                        MatScale_MPIAIJ,
2622                                        MatShift_MPIAIJ,
2623                                        MatDiagonalSet_MPIAIJ,
2624                                        MatZeroRowsColumns_MPIAIJ,
2625                                 /*49*/ MatSetRandom_MPIAIJ,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2631                                        0,
2632                                        MatSetUnfactored_MPIAIJ,
2633                                        MatPermute_MPIAIJ,
2634                                        0,
2635                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2636                                        MatDestroy_MPIAIJ,
2637                                        MatView_MPIAIJ,
2638                                        0,
2639                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2640                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2641                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2646                                        MatGetRowMinAbs_MPIAIJ,
2647                                        0,
2648                                        MatSetColoring_MPIAIJ,
2649                                        0,
2650                                        MatSetValuesAdifor_MPIAIJ,
2651                                 /*75*/ MatFDColoringApply_AIJ,
2652                                        MatSetFromOptions_MPIAIJ,
2653                                        0,
2654                                        0,
2655                                        MatFindZeroDiagonals_MPIAIJ,
2656                                 /*80*/ 0,
2657                                        0,
2658                                        0,
2659                                 /*83*/ MatLoad_MPIAIJ,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2666                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2667                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2668                                        MatPtAP_MPIAIJ_MPIAIJ,
2669                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2670                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                 /*99*/ 0,
2676                                        0,
2677                                        0,
2678                                        MatConjugate_MPIAIJ,
2679                                        0,
2680                                 /*104*/MatSetValuesRow_MPIAIJ,
2681                                        MatRealPart_MPIAIJ,
2682                                        MatImaginaryPart_MPIAIJ,
2683                                        0,
2684                                        0,
2685                                 /*109*/0,
2686                                        0,
2687                                        MatGetRowMin_MPIAIJ,
2688                                        0,
2689                                        MatMissingDiagonal_MPIAIJ,
2690                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2691                                        0,
2692                                        MatGetGhosts_MPIAIJ,
2693                                        0,
2694                                        0,
2695                                 /*119*/0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                        MatGetMultiProcBlock_MPIAIJ,
2700                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2701                                        MatGetColumnNorms_MPIAIJ,
2702                                        MatInvertBlockDiagonal_MPIAIJ,
2703                                        0,
2704                                        MatGetSubMatricesMPI_MPIAIJ,
2705                                 /*129*/0,
2706                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2707                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2708                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2709                                        0,
2710                                 /*134*/0,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                 /*139*/0,
2716                                        0,
2717                                        0,
2718                                        MatFDColoringSetUp_MPIXAIJ,
2719                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2720                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2721 };
2722 
2723 /* ----------------------------------------------------------------------------------------*/
2724 
2725 #undef __FUNCT__
2726 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2727 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2728 {
2729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2730   PetscErrorCode ierr;
2731 
2732   PetscFunctionBegin;
2733   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2734   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2735   PetscFunctionReturn(0);
2736 }
2737 
2738 #undef __FUNCT__
2739 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2740 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2741 {
2742   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2743   PetscErrorCode ierr;
2744 
2745   PetscFunctionBegin;
2746   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2747   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 #undef __FUNCT__
2752 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2753 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2754 {
2755   Mat_MPIAIJ     *b;
2756   PetscErrorCode ierr;
2757 
2758   PetscFunctionBegin;
2759   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2760   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2761   b = (Mat_MPIAIJ*)B->data;
2762 
2763   if (!B->preallocated) {
2764     /* Explicitly create 2 MATSEQAIJ matrices. */
2765     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2766     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2767     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2768     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2769     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2770     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2771     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2772     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2773     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2774     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2775   }
2776 
2777   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2778   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2779   B->preallocated = PETSC_TRUE;
2780   PetscFunctionReturn(0);
2781 }
2782 
2783 #undef __FUNCT__
2784 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2785 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2786 {
2787   Mat            mat;
2788   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2789   PetscErrorCode ierr;
2790 
2791   PetscFunctionBegin;
2792   *newmat = 0;
2793   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2794   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2795   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2796   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2797   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2798   a       = (Mat_MPIAIJ*)mat->data;
2799 
2800   mat->factortype   = matin->factortype;
2801   mat->assembled    = PETSC_TRUE;
2802   mat->insertmode   = NOT_SET_VALUES;
2803   mat->preallocated = PETSC_TRUE;
2804 
2805   a->size         = oldmat->size;
2806   a->rank         = oldmat->rank;
2807   a->donotstash   = oldmat->donotstash;
2808   a->roworiented  = oldmat->roworiented;
2809   a->rowindices   = 0;
2810   a->rowvalues    = 0;
2811   a->getrowactive = PETSC_FALSE;
2812 
2813   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2814   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2815 
2816   if (oldmat->colmap) {
2817 #if defined(PETSC_USE_CTABLE)
2818     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2819 #else
2820     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2821     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2822     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2823 #endif
2824   } else a->colmap = 0;
2825   if (oldmat->garray) {
2826     PetscInt len;
2827     len  = oldmat->B->cmap->n;
2828     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2829     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2830     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2831   } else a->garray = 0;
2832 
2833   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2834   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2835   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2836   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2837   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2838   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2839   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2840   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2841   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2842   *newmat = mat;
2843   PetscFunctionReturn(0);
2844 }
2845 
2846 
2847 
2848 #undef __FUNCT__
2849 #define __FUNCT__ "MatLoad_MPIAIJ"
2850 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2851 {
2852   PetscScalar    *vals,*svals;
2853   MPI_Comm       comm;
2854   PetscErrorCode ierr;
2855   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2856   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2857   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2858   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2859   PetscInt       cend,cstart,n,*rowners;
2860   int            fd;
2861   PetscInt       bs = newMat->rmap->bs;
2862 
2863   PetscFunctionBegin;
2864   /* force binary viewer to load .info file if it has not yet done so */
2865   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2866   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2867   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2868   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2869   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2870   if (!rank) {
2871     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2872     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2873   }
2874 
2875   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2876   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2877   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2878   if (bs < 0) bs = 1;
2879 
2880   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2881   M    = header[1]; N = header[2];
2882 
2883   /* If global sizes are set, check if they are consistent with that given in the file */
2884   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2885   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2886 
2887   /* determine ownership of all (block) rows */
2888   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2889   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2890   else m = newMat->rmap->n; /* Set by user */
2891 
2892   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2893   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2894 
2895   /* First process needs enough room for process with most rows */
2896   if (!rank) {
2897     mmax = rowners[1];
2898     for (i=2; i<=size; i++) {
2899       mmax = PetscMax(mmax, rowners[i]);
2900     }
2901   } else mmax = -1;             /* unused, but compilers complain */
2902 
2903   rowners[0] = 0;
2904   for (i=2; i<=size; i++) {
2905     rowners[i] += rowners[i-1];
2906   }
2907   rstart = rowners[rank];
2908   rend   = rowners[rank+1];
2909 
2910   /* distribute row lengths to all processors */
2911   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2912   if (!rank) {
2913     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2914     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2915     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2916     for (j=0; j<m; j++) {
2917       procsnz[0] += ourlens[j];
2918     }
2919     for (i=1; i<size; i++) {
2920       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2921       /* calculate the number of nonzeros on each processor */
2922       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2923         procsnz[i] += rowlengths[j];
2924       }
2925       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2926     }
2927     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2928   } else {
2929     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2930   }
2931 
2932   if (!rank) {
2933     /* determine max buffer needed and allocate it */
2934     maxnz = 0;
2935     for (i=0; i<size; i++) {
2936       maxnz = PetscMax(maxnz,procsnz[i]);
2937     }
2938     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2939 
2940     /* read in my part of the matrix column indices  */
2941     nz   = procsnz[0];
2942     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2943     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2944 
2945     /* read in every one elses and ship off */
2946     for (i=1; i<size; i++) {
2947       nz   = procsnz[i];
2948       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2949       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2950     }
2951     ierr = PetscFree(cols);CHKERRQ(ierr);
2952   } else {
2953     /* determine buffer space needed for message */
2954     nz = 0;
2955     for (i=0; i<m; i++) {
2956       nz += ourlens[i];
2957     }
2958     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2959 
2960     /* receive message of column indices*/
2961     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2962   }
2963 
2964   /* determine column ownership if matrix is not square */
2965   if (N != M) {
2966     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2967     else n = newMat->cmap->n;
2968     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2969     cstart = cend - n;
2970   } else {
2971     cstart = rstart;
2972     cend   = rend;
2973     n      = cend - cstart;
2974   }
2975 
2976   /* loop over local rows, determining number of off diagonal entries */
2977   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2978   jj   = 0;
2979   for (i=0; i<m; i++) {
2980     for (j=0; j<ourlens[i]; j++) {
2981       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2982       jj++;
2983     }
2984   }
2985 
2986   for (i=0; i<m; i++) {
2987     ourlens[i] -= offlens[i];
2988   }
2989   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2990 
2991   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2992 
2993   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2994 
2995   for (i=0; i<m; i++) {
2996     ourlens[i] += offlens[i];
2997   }
2998 
2999   if (!rank) {
3000     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3001 
3002     /* read in my part of the matrix numerical values  */
3003     nz   = procsnz[0];
3004     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3005 
3006     /* insert into matrix */
3007     jj      = rstart;
3008     smycols = mycols;
3009     svals   = vals;
3010     for (i=0; i<m; i++) {
3011       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3012       smycols += ourlens[i];
3013       svals   += ourlens[i];
3014       jj++;
3015     }
3016 
3017     /* read in other processors and ship out */
3018     for (i=1; i<size; i++) {
3019       nz   = procsnz[i];
3020       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3021       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3022     }
3023     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3024   } else {
3025     /* receive numeric values */
3026     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3027 
3028     /* receive message of values*/
3029     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3030 
3031     /* insert into matrix */
3032     jj      = rstart;
3033     smycols = mycols;
3034     svals   = vals;
3035     for (i=0; i<m; i++) {
3036       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3037       smycols += ourlens[i];
3038       svals   += ourlens[i];
3039       jj++;
3040     }
3041   }
3042   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3043   ierr = PetscFree(vals);CHKERRQ(ierr);
3044   ierr = PetscFree(mycols);CHKERRQ(ierr);
3045   ierr = PetscFree(rowners);CHKERRQ(ierr);
3046   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3047   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3048   PetscFunctionReturn(0);
3049 }
3050 
3051 #undef __FUNCT__
3052 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3053 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3054 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3055 {
3056   PetscErrorCode ierr;
3057   IS             iscol_local;
3058   PetscInt       csize;
3059 
3060   PetscFunctionBegin;
3061   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3062   if (call == MAT_REUSE_MATRIX) {
3063     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3064     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3065   } else {
3066     /* check if we are grabbing all columns*/
3067     PetscBool    isstride;
3068     PetscMPIInt  lisstride = 0,gisstride;
3069     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3070     if (isstride) {
3071       PetscInt  start,len,mstart,mlen;
3072       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3073       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3074       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3075       if (mstart == start && mlen-mstart == len) lisstride = 1;
3076     }
3077     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3078     if (gisstride) {
3079       PetscInt N;
3080       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3081       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3082       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3083       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3084     } else {
3085       PetscInt cbs;
3086       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3087       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3088       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3089     }
3090   }
3091   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3092   if (call == MAT_INITIAL_MATRIX) {
3093     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3094     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3095   }
3096   PetscFunctionReturn(0);
3097 }
3098 
3099 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3100 #undef __FUNCT__
3101 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3102 /*
3103     Not great since it makes two copies of the submatrix, first an SeqAIJ
3104   in local and then by concatenating the local matrices the end result.
3105   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3106 
3107   Note: This requires a sequential iscol with all indices.
3108 */
3109 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3110 {
3111   PetscErrorCode ierr;
3112   PetscMPIInt    rank,size;
3113   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3114   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3115   PetscBool      allcolumns, colflag;
3116   Mat            M,Mreuse;
3117   MatScalar      *vwork,*aa;
3118   MPI_Comm       comm;
3119   Mat_SeqAIJ     *aij;
3120 
3121   PetscFunctionBegin;
3122   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3123   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3124   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3125 
3126   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3127   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3128   if (colflag && ncol == mat->cmap->N) {
3129     allcolumns = PETSC_TRUE;
3130     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3131   } else {
3132     allcolumns = PETSC_FALSE;
3133   }
3134   if (call ==  MAT_REUSE_MATRIX) {
3135     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3136     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3137     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3138   } else {
3139     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3140   }
3141 
3142   /*
3143       m - number of local rows
3144       n - number of columns (same on all processors)
3145       rstart - first row in new global matrix generated
3146   */
3147   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3148   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3149   if (call == MAT_INITIAL_MATRIX) {
3150     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3151     ii  = aij->i;
3152     jj  = aij->j;
3153 
3154     /*
3155         Determine the number of non-zeros in the diagonal and off-diagonal
3156         portions of the matrix in order to do correct preallocation
3157     */
3158 
3159     /* first get start and end of "diagonal" columns */
3160     if (csize == PETSC_DECIDE) {
3161       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3162       if (mglobal == n) { /* square matrix */
3163         nlocal = m;
3164       } else {
3165         nlocal = n/size + ((n % size) > rank);
3166       }
3167     } else {
3168       nlocal = csize;
3169     }
3170     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3171     rstart = rend - nlocal;
3172     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3173 
3174     /* next, compute all the lengths */
3175     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3176     olens = dlens + m;
3177     for (i=0; i<m; i++) {
3178       jend = ii[i+1] - ii[i];
3179       olen = 0;
3180       dlen = 0;
3181       for (j=0; j<jend; j++) {
3182         if (*jj < rstart || *jj >= rend) olen++;
3183         else dlen++;
3184         jj++;
3185       }
3186       olens[i] = olen;
3187       dlens[i] = dlen;
3188     }
3189     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3190     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3191     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3192     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3193     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3194     ierr = PetscFree(dlens);CHKERRQ(ierr);
3195   } else {
3196     PetscInt ml,nl;
3197 
3198     M    = *newmat;
3199     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3200     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3201     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3202     /*
3203          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3204        rather than the slower MatSetValues().
3205     */
3206     M->was_assembled = PETSC_TRUE;
3207     M->assembled     = PETSC_FALSE;
3208   }
3209   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3210   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3211   ii   = aij->i;
3212   jj   = aij->j;
3213   aa   = aij->a;
3214   for (i=0; i<m; i++) {
3215     row   = rstart + i;
3216     nz    = ii[i+1] - ii[i];
3217     cwork = jj;     jj += nz;
3218     vwork = aa;     aa += nz;
3219     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3220   }
3221 
3222   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3223   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3224   *newmat = M;
3225 
3226   /* save submatrix used in processor for next request */
3227   if (call ==  MAT_INITIAL_MATRIX) {
3228     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3229     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3230   }
3231   PetscFunctionReturn(0);
3232 }
3233 
3234 #undef __FUNCT__
3235 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3236 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3237 {
3238   PetscInt       m,cstart, cend,j,nnz,i,d;
3239   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3240   const PetscInt *JJ;
3241   PetscScalar    *values;
3242   PetscErrorCode ierr;
3243 
3244   PetscFunctionBegin;
3245   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3246 
3247   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3248   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3249   m      = B->rmap->n;
3250   cstart = B->cmap->rstart;
3251   cend   = B->cmap->rend;
3252   rstart = B->rmap->rstart;
3253 
3254   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3255 
3256 #if defined(PETSC_USE_DEBUGGING)
3257   for (i=0; i<m; i++) {
3258     nnz = Ii[i+1]- Ii[i];
3259     JJ  = J + Ii[i];
3260     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3261     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3262     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3263   }
3264 #endif
3265 
3266   for (i=0; i<m; i++) {
3267     nnz     = Ii[i+1]- Ii[i];
3268     JJ      = J + Ii[i];
3269     nnz_max = PetscMax(nnz_max,nnz);
3270     d       = 0;
3271     for (j=0; j<nnz; j++) {
3272       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3273     }
3274     d_nnz[i] = d;
3275     o_nnz[i] = nnz - d;
3276   }
3277   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3278   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3279 
3280   if (v) values = (PetscScalar*)v;
3281   else {
3282     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3283   }
3284 
3285   for (i=0; i<m; i++) {
3286     ii   = i + rstart;
3287     nnz  = Ii[i+1]- Ii[i];
3288     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3289   }
3290   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3291   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3292 
3293   if (!v) {
3294     ierr = PetscFree(values);CHKERRQ(ierr);
3295   }
3296   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3297   PetscFunctionReturn(0);
3298 }
3299 
3300 #undef __FUNCT__
3301 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3302 /*@
3303    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3304    (the default parallel PETSc format).
3305 
3306    Collective on MPI_Comm
3307 
3308    Input Parameters:
3309 +  B - the matrix
3310 .  i - the indices into j for the start of each local row (starts with zero)
3311 .  j - the column indices for each local row (starts with zero)
3312 -  v - optional values in the matrix
3313 
3314    Level: developer
3315 
3316    Notes:
3317        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3318      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3319      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3320 
3321        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3322 
3323        The format which is used for the sparse matrix input, is equivalent to a
3324     row-major ordering.. i.e for the following matrix, the input data expected is
3325     as shown
3326 
3327 $        1 0 0
3328 $        2 0 3     P0
3329 $       -------
3330 $        4 5 6     P1
3331 $
3332 $     Process0 [P0]: rows_owned=[0,1]
3333 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3334 $        j =  {0,0,2}  [size = 3]
3335 $        v =  {1,2,3}  [size = 3]
3336 $
3337 $     Process1 [P1]: rows_owned=[2]
3338 $        i =  {0,3}    [size = nrow+1  = 1+1]
3339 $        j =  {0,1,2}  [size = 3]
3340 $        v =  {4,5,6}  [size = 3]
3341 
3342 .keywords: matrix, aij, compressed row, sparse, parallel
3343 
3344 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3345           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3346 @*/
3347 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3348 {
3349   PetscErrorCode ierr;
3350 
3351   PetscFunctionBegin;
3352   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3353   PetscFunctionReturn(0);
3354 }
3355 
3356 #undef __FUNCT__
3357 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3358 /*@C
3359    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3360    (the default parallel PETSc format).  For good matrix assembly performance
3361    the user should preallocate the matrix storage by setting the parameters
3362    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3363    performance can be increased by more than a factor of 50.
3364 
3365    Collective on MPI_Comm
3366 
3367    Input Parameters:
3368 +  B - the matrix
3369 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3370            (same value is used for all local rows)
3371 .  d_nnz - array containing the number of nonzeros in the various rows of the
3372            DIAGONAL portion of the local submatrix (possibly different for each row)
3373            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3374            The size of this array is equal to the number of local rows, i.e 'm'.
3375            For matrices that will be factored, you must leave room for (and set)
3376            the diagonal entry even if it is zero.
3377 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3378            submatrix (same value is used for all local rows).
3379 -  o_nnz - array containing the number of nonzeros in the various rows of the
3380            OFF-DIAGONAL portion of the local submatrix (possibly different for
3381            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3382            structure. The size of this array is equal to the number
3383            of local rows, i.e 'm'.
3384 
3385    If the *_nnz parameter is given then the *_nz parameter is ignored
3386 
3387    The AIJ format (also called the Yale sparse matrix format or
3388    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3389    storage.  The stored row and column indices begin with zero.
3390    See Users-Manual: ch_mat for details.
3391 
3392    The parallel matrix is partitioned such that the first m0 rows belong to
3393    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3394    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3395 
3396    The DIAGONAL portion of the local submatrix of a processor can be defined
3397    as the submatrix which is obtained by extraction the part corresponding to
3398    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3399    first row that belongs to the processor, r2 is the last row belonging to
3400    the this processor, and c1-c2 is range of indices of the local part of a
3401    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3402    common case of a square matrix, the row and column ranges are the same and
3403    the DIAGONAL part is also square. The remaining portion of the local
3404    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3405 
3406    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3407 
3408    You can call MatGetInfo() to get information on how effective the preallocation was;
3409    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3410    You can also run with the option -info and look for messages with the string
3411    malloc in them to see if additional memory allocation was needed.
3412 
3413    Example usage:
3414 
3415    Consider the following 8x8 matrix with 34 non-zero values, that is
3416    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3417    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3418    as follows:
3419 
3420 .vb
3421             1  2  0  |  0  3  0  |  0  4
3422     Proc0   0  5  6  |  7  0  0  |  8  0
3423             9  0 10  | 11  0  0  | 12  0
3424     -------------------------------------
3425            13  0 14  | 15 16 17  |  0  0
3426     Proc1   0 18  0  | 19 20 21  |  0  0
3427             0  0  0  | 22 23  0  | 24  0
3428     -------------------------------------
3429     Proc2  25 26 27  |  0  0 28  | 29  0
3430            30  0  0  | 31 32 33  |  0 34
3431 .ve
3432 
3433    This can be represented as a collection of submatrices as:
3434 
3435 .vb
3436       A B C
3437       D E F
3438       G H I
3439 .ve
3440 
3441    Where the submatrices A,B,C are owned by proc0, D,E,F are
3442    owned by proc1, G,H,I are owned by proc2.
3443 
3444    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3445    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3446    The 'M','N' parameters are 8,8, and have the same values on all procs.
3447 
3448    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3449    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3450    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3451    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3452    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3453    matrix, ans [DF] as another SeqAIJ matrix.
3454 
3455    When d_nz, o_nz parameters are specified, d_nz storage elements are
3456    allocated for every row of the local diagonal submatrix, and o_nz
3457    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3458    One way to choose d_nz and o_nz is to use the max nonzerors per local
3459    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3460    In this case, the values of d_nz,o_nz are:
3461 .vb
3462      proc0 : dnz = 2, o_nz = 2
3463      proc1 : dnz = 3, o_nz = 2
3464      proc2 : dnz = 1, o_nz = 4
3465 .ve
3466    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3467    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3468    for proc3. i.e we are using 12+15+10=37 storage locations to store
3469    34 values.
3470 
3471    When d_nnz, o_nnz parameters are specified, the storage is specified
3472    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3473    In the above case the values for d_nnz,o_nnz are:
3474 .vb
3475      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3476      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3477      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3478 .ve
3479    Here the space allocated is sum of all the above values i.e 34, and
3480    hence pre-allocation is perfect.
3481 
3482    Level: intermediate
3483 
3484 .keywords: matrix, aij, compressed row, sparse, parallel
3485 
3486 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3487           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3488 @*/
3489 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3490 {
3491   PetscErrorCode ierr;
3492 
3493   PetscFunctionBegin;
3494   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3495   PetscValidType(B,1);
3496   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3497   PetscFunctionReturn(0);
3498 }
3499 
3500 #undef __FUNCT__
3501 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3502 /*@
3503      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3504          CSR format the local rows.
3505 
3506    Collective on MPI_Comm
3507 
3508    Input Parameters:
3509 +  comm - MPI communicator
3510 .  m - number of local rows (Cannot be PETSC_DECIDE)
3511 .  n - This value should be the same as the local size used in creating the
3512        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3513        calculated if N is given) For square matrices n is almost always m.
3514 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3515 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3516 .   i - row indices
3517 .   j - column indices
3518 -   a - matrix values
3519 
3520    Output Parameter:
3521 .   mat - the matrix
3522 
3523    Level: intermediate
3524 
3525    Notes:
3526        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3527      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3528      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3529 
3530        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3531 
3532        The format which is used for the sparse matrix input, is equivalent to a
3533     row-major ordering.. i.e for the following matrix, the input data expected is
3534     as shown
3535 
3536 $        1 0 0
3537 $        2 0 3     P0
3538 $       -------
3539 $        4 5 6     P1
3540 $
3541 $     Process0 [P0]: rows_owned=[0,1]
3542 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3543 $        j =  {0,0,2}  [size = 3]
3544 $        v =  {1,2,3}  [size = 3]
3545 $
3546 $     Process1 [P1]: rows_owned=[2]
3547 $        i =  {0,3}    [size = nrow+1  = 1+1]
3548 $        j =  {0,1,2}  [size = 3]
3549 $        v =  {4,5,6}  [size = 3]
3550 
3551 .keywords: matrix, aij, compressed row, sparse, parallel
3552 
3553 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3554           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3555 @*/
3556 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3557 {
3558   PetscErrorCode ierr;
3559 
3560   PetscFunctionBegin;
3561   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3562   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3563   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3564   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3565   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3566   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3567   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3568   PetscFunctionReturn(0);
3569 }
3570 
3571 #undef __FUNCT__
3572 #define __FUNCT__ "MatCreateAIJ"
3573 /*@C
3574    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3575    (the default parallel PETSc format).  For good matrix assembly performance
3576    the user should preallocate the matrix storage by setting the parameters
3577    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3578    performance can be increased by more than a factor of 50.
3579 
3580    Collective on MPI_Comm
3581 
3582    Input Parameters:
3583 +  comm - MPI communicator
3584 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3585            This value should be the same as the local size used in creating the
3586            y vector for the matrix-vector product y = Ax.
3587 .  n - This value should be the same as the local size used in creating the
3588        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3589        calculated if N is given) For square matrices n is almost always m.
3590 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3591 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3592 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3593            (same value is used for all local rows)
3594 .  d_nnz - array containing the number of nonzeros in the various rows of the
3595            DIAGONAL portion of the local submatrix (possibly different for each row)
3596            or NULL, if d_nz is used to specify the nonzero structure.
3597            The size of this array is equal to the number of local rows, i.e 'm'.
3598 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3599            submatrix (same value is used for all local rows).
3600 -  o_nnz - array containing the number of nonzeros in the various rows of the
3601            OFF-DIAGONAL portion of the local submatrix (possibly different for
3602            each row) or NULL, if o_nz is used to specify the nonzero
3603            structure. The size of this array is equal to the number
3604            of local rows, i.e 'm'.
3605 
3606    Output Parameter:
3607 .  A - the matrix
3608 
3609    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3610    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3611    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3612 
3613    Notes:
3614    If the *_nnz parameter is given then the *_nz parameter is ignored
3615 
3616    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3617    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3618    storage requirements for this matrix.
3619 
3620    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3621    processor than it must be used on all processors that share the object for
3622    that argument.
3623 
3624    The user MUST specify either the local or global matrix dimensions
3625    (possibly both).
3626 
3627    The parallel matrix is partitioned across processors such that the
3628    first m0 rows belong to process 0, the next m1 rows belong to
3629    process 1, the next m2 rows belong to process 2 etc.. where
3630    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3631    values corresponding to [m x N] submatrix.
3632 
3633    The columns are logically partitioned with the n0 columns belonging
3634    to 0th partition, the next n1 columns belonging to the next
3635    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3636 
3637    The DIAGONAL portion of the local submatrix on any given processor
3638    is the submatrix corresponding to the rows and columns m,n
3639    corresponding to the given processor. i.e diagonal matrix on
3640    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3641    etc. The remaining portion of the local submatrix [m x (N-n)]
3642    constitute the OFF-DIAGONAL portion. The example below better
3643    illustrates this concept.
3644 
3645    For a square global matrix we define each processor's diagonal portion
3646    to be its local rows and the corresponding columns (a square submatrix);
3647    each processor's off-diagonal portion encompasses the remainder of the
3648    local matrix (a rectangular submatrix).
3649 
3650    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3651 
3652    When calling this routine with a single process communicator, a matrix of
3653    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3654    type of communicator, use the construction mechanism:
3655      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3656 
3657    By default, this format uses inodes (identical nodes) when possible.
3658    We search for consecutive rows with the same nonzero structure, thereby
3659    reusing matrix information to achieve increased efficiency.
3660 
3661    Options Database Keys:
3662 +  -mat_no_inode  - Do not use inodes
3663 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3664 -  -mat_aij_oneindex - Internally use indexing starting at 1
3665         rather than 0.  Note that when calling MatSetValues(),
3666         the user still MUST index entries starting at 0!
3667 
3668 
3669    Example usage:
3670 
3671    Consider the following 8x8 matrix with 34 non-zero values, that is
3672    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3673    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3674    as follows:
3675 
3676 .vb
3677             1  2  0  |  0  3  0  |  0  4
3678     Proc0   0  5  6  |  7  0  0  |  8  0
3679             9  0 10  | 11  0  0  | 12  0
3680     -------------------------------------
3681            13  0 14  | 15 16 17  |  0  0
3682     Proc1   0 18  0  | 19 20 21  |  0  0
3683             0  0  0  | 22 23  0  | 24  0
3684     -------------------------------------
3685     Proc2  25 26 27  |  0  0 28  | 29  0
3686            30  0  0  | 31 32 33  |  0 34
3687 .ve
3688 
3689    This can be represented as a collection of submatrices as:
3690 
3691 .vb
3692       A B C
3693       D E F
3694       G H I
3695 .ve
3696 
3697    Where the submatrices A,B,C are owned by proc0, D,E,F are
3698    owned by proc1, G,H,I are owned by proc2.
3699 
3700    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3701    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3702    The 'M','N' parameters are 8,8, and have the same values on all procs.
3703 
3704    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3705    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3706    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3707    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3708    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3709    matrix, ans [DF] as another SeqAIJ matrix.
3710 
3711    When d_nz, o_nz parameters are specified, d_nz storage elements are
3712    allocated for every row of the local diagonal submatrix, and o_nz
3713    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3714    One way to choose d_nz and o_nz is to use the max nonzerors per local
3715    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3716    In this case, the values of d_nz,o_nz are:
3717 .vb
3718      proc0 : dnz = 2, o_nz = 2
3719      proc1 : dnz = 3, o_nz = 2
3720      proc2 : dnz = 1, o_nz = 4
3721 .ve
3722    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3723    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3724    for proc3. i.e we are using 12+15+10=37 storage locations to store
3725    34 values.
3726 
3727    When d_nnz, o_nnz parameters are specified, the storage is specified
3728    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3729    In the above case the values for d_nnz,o_nnz are:
3730 .vb
3731      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3732      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3733      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3734 .ve
3735    Here the space allocated is sum of all the above values i.e 34, and
3736    hence pre-allocation is perfect.
3737 
3738    Level: intermediate
3739 
3740 .keywords: matrix, aij, compressed row, sparse, parallel
3741 
3742 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3743           MPIAIJ, MatCreateMPIAIJWithArrays()
3744 @*/
3745 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3746 {
3747   PetscErrorCode ierr;
3748   PetscMPIInt    size;
3749 
3750   PetscFunctionBegin;
3751   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3752   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3753   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3754   if (size > 1) {
3755     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3756     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3757   } else {
3758     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3759     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3760   }
3761   PetscFunctionReturn(0);
3762 }
3763 
3764 #undef __FUNCT__
3765 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3766 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3767 {
3768   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3769 
3770   PetscFunctionBegin;
3771   if (Ad)     *Ad     = a->A;
3772   if (Ao)     *Ao     = a->B;
3773   if (colmap) *colmap = a->garray;
3774   PetscFunctionReturn(0);
3775 }
3776 
3777 #undef __FUNCT__
3778 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3779 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3780 {
3781   PetscErrorCode ierr;
3782   PetscInt       i;
3783   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3784 
3785   PetscFunctionBegin;
3786   if (coloring->ctype == IS_COLORING_GLOBAL) {
3787     ISColoringValue *allcolors,*colors;
3788     ISColoring      ocoloring;
3789 
3790     /* set coloring for diagonal portion */
3791     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3792 
3793     /* set coloring for off-diagonal portion */
3794     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3795     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3796     for (i=0; i<a->B->cmap->n; i++) {
3797       colors[i] = allcolors[a->garray[i]];
3798     }
3799     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3800     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3801     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3802     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3803   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3804     ISColoringValue *colors;
3805     PetscInt        *larray;
3806     ISColoring      ocoloring;
3807 
3808     /* set coloring for diagonal portion */
3809     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3810     for (i=0; i<a->A->cmap->n; i++) {
3811       larray[i] = i + A->cmap->rstart;
3812     }
3813     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3814     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3815     for (i=0; i<a->A->cmap->n; i++) {
3816       colors[i] = coloring->colors[larray[i]];
3817     }
3818     ierr = PetscFree(larray);CHKERRQ(ierr);
3819     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3820     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3821     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3822 
3823     /* set coloring for off-diagonal portion */
3824     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3825     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3826     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3827     for (i=0; i<a->B->cmap->n; i++) {
3828       colors[i] = coloring->colors[larray[i]];
3829     }
3830     ierr = PetscFree(larray);CHKERRQ(ierr);
3831     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3832     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3833     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3834   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3835   PetscFunctionReturn(0);
3836 }
3837 
3838 #undef __FUNCT__
3839 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3840 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3841 {
3842   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3843   PetscErrorCode ierr;
3844 
3845   PetscFunctionBegin;
3846   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3847   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3848   PetscFunctionReturn(0);
3849 }
3850 
3851 #undef __FUNCT__
3852 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3853 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3854 {
3855   PetscErrorCode ierr;
3856   PetscInt       m,N,i,rstart,nnz,Ii;
3857   PetscInt       *indx;
3858   PetscScalar    *values;
3859 
3860   PetscFunctionBegin;
3861   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3862   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3863     PetscInt       *dnz,*onz,sum,bs,cbs;
3864 
3865     if (n == PETSC_DECIDE) {
3866       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3867     }
3868     /* Check sum(n) = N */
3869     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3870     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3871 
3872     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3873     rstart -= m;
3874 
3875     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3876     for (i=0; i<m; i++) {
3877       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3878       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3879       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3880     }
3881 
3882     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3883     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3884     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3885     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3886     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3887     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3888     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3889   }
3890 
3891   /* numeric phase */
3892   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3893   for (i=0; i<m; i++) {
3894     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3895     Ii   = i + rstart;
3896     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3897     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3898   }
3899   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3900   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3901   PetscFunctionReturn(0);
3902 }
3903 
3904 #undef __FUNCT__
3905 #define __FUNCT__ "MatFileSplit"
3906 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3907 {
3908   PetscErrorCode    ierr;
3909   PetscMPIInt       rank;
3910   PetscInt          m,N,i,rstart,nnz;
3911   size_t            len;
3912   const PetscInt    *indx;
3913   PetscViewer       out;
3914   char              *name;
3915   Mat               B;
3916   const PetscScalar *values;
3917 
3918   PetscFunctionBegin;
3919   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3920   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3921   /* Should this be the type of the diagonal block of A? */
3922   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3923   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3924   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3925   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3926   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3927   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3928   for (i=0; i<m; i++) {
3929     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3930     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3931     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3932   }
3933   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3934   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3935 
3936   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3937   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3938   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3939   sprintf(name,"%s.%d",outfile,rank);
3940   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3941   ierr = PetscFree(name);CHKERRQ(ierr);
3942   ierr = MatView(B,out);CHKERRQ(ierr);
3943   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3944   ierr = MatDestroy(&B);CHKERRQ(ierr);
3945   PetscFunctionReturn(0);
3946 }
3947 
3948 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3949 #undef __FUNCT__
3950 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3951 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3952 {
3953   PetscErrorCode      ierr;
3954   Mat_Merge_SeqsToMPI *merge;
3955   PetscContainer      container;
3956 
3957   PetscFunctionBegin;
3958   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3959   if (container) {
3960     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3961     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3962     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3963     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3964     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3965     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3966     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3967     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3968     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3969     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3970     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3971     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3972     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3973     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3974     ierr = PetscFree(merge);CHKERRQ(ierr);
3975     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3976   }
3977   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3978   PetscFunctionReturn(0);
3979 }
3980 
3981 #include <../src/mat/utils/freespace.h>
3982 #include <petscbt.h>
3983 
3984 #undef __FUNCT__
3985 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3986 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3987 {
3988   PetscErrorCode      ierr;
3989   MPI_Comm            comm;
3990   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3991   PetscMPIInt         size,rank,taga,*len_s;
3992   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3993   PetscInt            proc,m;
3994   PetscInt            **buf_ri,**buf_rj;
3995   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3996   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3997   MPI_Request         *s_waits,*r_waits;
3998   MPI_Status          *status;
3999   MatScalar           *aa=a->a;
4000   MatScalar           **abuf_r,*ba_i;
4001   Mat_Merge_SeqsToMPI *merge;
4002   PetscContainer      container;
4003 
4004   PetscFunctionBegin;
4005   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4006   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4007 
4008   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4009   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4010 
4011   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4012   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4013 
4014   bi     = merge->bi;
4015   bj     = merge->bj;
4016   buf_ri = merge->buf_ri;
4017   buf_rj = merge->buf_rj;
4018 
4019   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4020   owners = merge->rowmap->range;
4021   len_s  = merge->len_s;
4022 
4023   /* send and recv matrix values */
4024   /*-----------------------------*/
4025   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4026   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4027 
4028   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4029   for (proc=0,k=0; proc<size; proc++) {
4030     if (!len_s[proc]) continue;
4031     i    = owners[proc];
4032     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4033     k++;
4034   }
4035 
4036   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4037   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4038   ierr = PetscFree(status);CHKERRQ(ierr);
4039 
4040   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4041   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4042 
4043   /* insert mat values of mpimat */
4044   /*----------------------------*/
4045   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4046   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4047 
4048   for (k=0; k<merge->nrecv; k++) {
4049     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4050     nrows       = *(buf_ri_k[k]);
4051     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4052     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4053   }
4054 
4055   /* set values of ba */
4056   m = merge->rowmap->n;
4057   for (i=0; i<m; i++) {
4058     arow = owners[rank] + i;
4059     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4060     bnzi = bi[i+1] - bi[i];
4061     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4062 
4063     /* add local non-zero vals of this proc's seqmat into ba */
4064     anzi   = ai[arow+1] - ai[arow];
4065     aj     = a->j + ai[arow];
4066     aa     = a->a + ai[arow];
4067     nextaj = 0;
4068     for (j=0; nextaj<anzi; j++) {
4069       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4070         ba_i[j] += aa[nextaj++];
4071       }
4072     }
4073 
4074     /* add received vals into ba */
4075     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4076       /* i-th row */
4077       if (i == *nextrow[k]) {
4078         anzi   = *(nextai[k]+1) - *nextai[k];
4079         aj     = buf_rj[k] + *(nextai[k]);
4080         aa     = abuf_r[k] + *(nextai[k]);
4081         nextaj = 0;
4082         for (j=0; nextaj<anzi; j++) {
4083           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4084             ba_i[j] += aa[nextaj++];
4085           }
4086         }
4087         nextrow[k]++; nextai[k]++;
4088       }
4089     }
4090     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4091   }
4092   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4093   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4094 
4095   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4096   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4097   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4098   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4099   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4100   PetscFunctionReturn(0);
4101 }
4102 
4103 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4104 
4105 #undef __FUNCT__
4106 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4107 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4108 {
4109   PetscErrorCode      ierr;
4110   Mat                 B_mpi;
4111   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4112   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4113   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4114   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4115   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4116   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4117   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4118   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4119   MPI_Status          *status;
4120   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4121   PetscBT             lnkbt;
4122   Mat_Merge_SeqsToMPI *merge;
4123   PetscContainer      container;
4124 
4125   PetscFunctionBegin;
4126   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4127 
4128   /* make sure it is a PETSc comm */
4129   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4130   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4131   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4132 
4133   ierr = PetscNew(&merge);CHKERRQ(ierr);
4134   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4135 
4136   /* determine row ownership */
4137   /*---------------------------------------------------------*/
4138   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4139   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4140   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4141   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4142   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4143   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4144   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4145 
4146   m      = merge->rowmap->n;
4147   owners = merge->rowmap->range;
4148 
4149   /* determine the number of messages to send, their lengths */
4150   /*---------------------------------------------------------*/
4151   len_s = merge->len_s;
4152 
4153   len          = 0; /* length of buf_si[] */
4154   merge->nsend = 0;
4155   for (proc=0; proc<size; proc++) {
4156     len_si[proc] = 0;
4157     if (proc == rank) {
4158       len_s[proc] = 0;
4159     } else {
4160       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4161       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4162     }
4163     if (len_s[proc]) {
4164       merge->nsend++;
4165       nrows = 0;
4166       for (i=owners[proc]; i<owners[proc+1]; i++) {
4167         if (ai[i+1] > ai[i]) nrows++;
4168       }
4169       len_si[proc] = 2*(nrows+1);
4170       len         += len_si[proc];
4171     }
4172   }
4173 
4174   /* determine the number and length of messages to receive for ij-structure */
4175   /*-------------------------------------------------------------------------*/
4176   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4177   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4178 
4179   /* post the Irecv of j-structure */
4180   /*-------------------------------*/
4181   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4182   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4183 
4184   /* post the Isend of j-structure */
4185   /*--------------------------------*/
4186   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4187 
4188   for (proc=0, k=0; proc<size; proc++) {
4189     if (!len_s[proc]) continue;
4190     i    = owners[proc];
4191     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4192     k++;
4193   }
4194 
4195   /* receives and sends of j-structure are complete */
4196   /*------------------------------------------------*/
4197   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4198   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4199 
4200   /* send and recv i-structure */
4201   /*---------------------------*/
4202   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4203   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4204 
4205   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4206   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4207   for (proc=0,k=0; proc<size; proc++) {
4208     if (!len_s[proc]) continue;
4209     /* form outgoing message for i-structure:
4210          buf_si[0]:                 nrows to be sent
4211                [1:nrows]:           row index (global)
4212                [nrows+1:2*nrows+1]: i-structure index
4213     */
4214     /*-------------------------------------------*/
4215     nrows       = len_si[proc]/2 - 1;
4216     buf_si_i    = buf_si + nrows+1;
4217     buf_si[0]   = nrows;
4218     buf_si_i[0] = 0;
4219     nrows       = 0;
4220     for (i=owners[proc]; i<owners[proc+1]; i++) {
4221       anzi = ai[i+1] - ai[i];
4222       if (anzi) {
4223         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4224         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4225         nrows++;
4226       }
4227     }
4228     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4229     k++;
4230     buf_si += len_si[proc];
4231   }
4232 
4233   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4234   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4235 
4236   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4237   for (i=0; i<merge->nrecv; i++) {
4238     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4239   }
4240 
4241   ierr = PetscFree(len_si);CHKERRQ(ierr);
4242   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4243   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4244   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4245   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4246   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4247   ierr = PetscFree(status);CHKERRQ(ierr);
4248 
4249   /* compute a local seq matrix in each processor */
4250   /*----------------------------------------------*/
4251   /* allocate bi array and free space for accumulating nonzero column info */
4252   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4253   bi[0] = 0;
4254 
4255   /* create and initialize a linked list */
4256   nlnk = N+1;
4257   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4258 
4259   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4260   len  = ai[owners[rank+1]] - ai[owners[rank]];
4261   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4262 
4263   current_space = free_space;
4264 
4265   /* determine symbolic info for each local row */
4266   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4267 
4268   for (k=0; k<merge->nrecv; k++) {
4269     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4270     nrows       = *buf_ri_k[k];
4271     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4272     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4273   }
4274 
4275   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4276   len  = 0;
4277   for (i=0; i<m; i++) {
4278     bnzi = 0;
4279     /* add local non-zero cols of this proc's seqmat into lnk */
4280     arow  = owners[rank] + i;
4281     anzi  = ai[arow+1] - ai[arow];
4282     aj    = a->j + ai[arow];
4283     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4284     bnzi += nlnk;
4285     /* add received col data into lnk */
4286     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4287       if (i == *nextrow[k]) { /* i-th row */
4288         anzi  = *(nextai[k]+1) - *nextai[k];
4289         aj    = buf_rj[k] + *nextai[k];
4290         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4291         bnzi += nlnk;
4292         nextrow[k]++; nextai[k]++;
4293       }
4294     }
4295     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4296 
4297     /* if free space is not available, make more free space */
4298     if (current_space->local_remaining<bnzi) {
4299       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4300       nspacedouble++;
4301     }
4302     /* copy data into free space, then initialize lnk */
4303     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4304     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4305 
4306     current_space->array           += bnzi;
4307     current_space->local_used      += bnzi;
4308     current_space->local_remaining -= bnzi;
4309 
4310     bi[i+1] = bi[i] + bnzi;
4311   }
4312 
4313   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4314 
4315   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4316   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4317   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4318 
4319   /* create symbolic parallel matrix B_mpi */
4320   /*---------------------------------------*/
4321   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4322   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4323   if (n==PETSC_DECIDE) {
4324     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4325   } else {
4326     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4327   }
4328   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4329   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4330   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4331   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4332   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4333 
4334   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4335   B_mpi->assembled    = PETSC_FALSE;
4336   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4337   merge->bi           = bi;
4338   merge->bj           = bj;
4339   merge->buf_ri       = buf_ri;
4340   merge->buf_rj       = buf_rj;
4341   merge->coi          = NULL;
4342   merge->coj          = NULL;
4343   merge->owners_co    = NULL;
4344 
4345   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4346 
4347   /* attach the supporting struct to B_mpi for reuse */
4348   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4349   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4350   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4351   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4352   *mpimat = B_mpi;
4353 
4354   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4355   PetscFunctionReturn(0);
4356 }
4357 
4358 #undef __FUNCT__
4359 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4360 /*@C
4361       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4362                  matrices from each processor
4363 
4364     Collective on MPI_Comm
4365 
4366    Input Parameters:
4367 +    comm - the communicators the parallel matrix will live on
4368 .    seqmat - the input sequential matrices
4369 .    m - number of local rows (or PETSC_DECIDE)
4370 .    n - number of local columns (or PETSC_DECIDE)
4371 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4372 
4373    Output Parameter:
4374 .    mpimat - the parallel matrix generated
4375 
4376     Level: advanced
4377 
4378    Notes:
4379      The dimensions of the sequential matrix in each processor MUST be the same.
4380      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4381      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4382 @*/
4383 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4384 {
4385   PetscErrorCode ierr;
4386   PetscMPIInt    size;
4387 
4388   PetscFunctionBegin;
4389   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4390   if (size == 1) {
4391     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4392     if (scall == MAT_INITIAL_MATRIX) {
4393       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4394     } else {
4395       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4396     }
4397     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4398     PetscFunctionReturn(0);
4399   }
4400   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4401   if (scall == MAT_INITIAL_MATRIX) {
4402     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4403   }
4404   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4405   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4406   PetscFunctionReturn(0);
4407 }
4408 
4409 #undef __FUNCT__
4410 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4411 /*@
4412      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4413           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4414           with MatGetSize()
4415 
4416     Not Collective
4417 
4418    Input Parameters:
4419 +    A - the matrix
4420 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4421 
4422    Output Parameter:
4423 .    A_loc - the local sequential matrix generated
4424 
4425     Level: developer
4426 
4427 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4428 
4429 @*/
4430 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4431 {
4432   PetscErrorCode ierr;
4433   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4434   Mat_SeqAIJ     *mat,*a,*b;
4435   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4436   MatScalar      *aa,*ba,*cam;
4437   PetscScalar    *ca;
4438   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4439   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4440   PetscBool      match;
4441   MPI_Comm       comm;
4442   PetscMPIInt    size;
4443 
4444   PetscFunctionBegin;
4445   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4446   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4447   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4448   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4449   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4450 
4451   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4452   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4453   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4454   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4455   aa = a->a; ba = b->a;
4456   if (scall == MAT_INITIAL_MATRIX) {
4457     if (size == 1) {
4458       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4459       PetscFunctionReturn(0);
4460     }
4461 
4462     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4463     ci[0] = 0;
4464     for (i=0; i<am; i++) {
4465       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4466     }
4467     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4468     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4469     k    = 0;
4470     for (i=0; i<am; i++) {
4471       ncols_o = bi[i+1] - bi[i];
4472       ncols_d = ai[i+1] - ai[i];
4473       /* off-diagonal portion of A */
4474       for (jo=0; jo<ncols_o; jo++) {
4475         col = cmap[*bj];
4476         if (col >= cstart) break;
4477         cj[k]   = col; bj++;
4478         ca[k++] = *ba++;
4479       }
4480       /* diagonal portion of A */
4481       for (j=0; j<ncols_d; j++) {
4482         cj[k]   = cstart + *aj++;
4483         ca[k++] = *aa++;
4484       }
4485       /* off-diagonal portion of A */
4486       for (j=jo; j<ncols_o; j++) {
4487         cj[k]   = cmap[*bj++];
4488         ca[k++] = *ba++;
4489       }
4490     }
4491     /* put together the new matrix */
4492     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4493     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4494     /* Since these are PETSc arrays, change flags to free them as necessary. */
4495     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4496     mat->free_a  = PETSC_TRUE;
4497     mat->free_ij = PETSC_TRUE;
4498     mat->nonew   = 0;
4499   } else if (scall == MAT_REUSE_MATRIX) {
4500     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4501     ci = mat->i; cj = mat->j; cam = mat->a;
4502     for (i=0; i<am; i++) {
4503       /* off-diagonal portion of A */
4504       ncols_o = bi[i+1] - bi[i];
4505       for (jo=0; jo<ncols_o; jo++) {
4506         col = cmap[*bj];
4507         if (col >= cstart) break;
4508         *cam++ = *ba++; bj++;
4509       }
4510       /* diagonal portion of A */
4511       ncols_d = ai[i+1] - ai[i];
4512       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4513       /* off-diagonal portion of A */
4514       for (j=jo; j<ncols_o; j++) {
4515         *cam++ = *ba++; bj++;
4516       }
4517     }
4518   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4519   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4520   PetscFunctionReturn(0);
4521 }
4522 
4523 #undef __FUNCT__
4524 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4525 /*@C
4526      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4527 
4528     Not Collective
4529 
4530    Input Parameters:
4531 +    A - the matrix
4532 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4533 -    row, col - index sets of rows and columns to extract (or NULL)
4534 
4535    Output Parameter:
4536 .    A_loc - the local sequential matrix generated
4537 
4538     Level: developer
4539 
4540 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4541 
4542 @*/
4543 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4544 {
4545   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4546   PetscErrorCode ierr;
4547   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4548   IS             isrowa,iscola;
4549   Mat            *aloc;
4550   PetscBool      match;
4551 
4552   PetscFunctionBegin;
4553   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4554   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4555   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4556   if (!row) {
4557     start = A->rmap->rstart; end = A->rmap->rend;
4558     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4559   } else {
4560     isrowa = *row;
4561   }
4562   if (!col) {
4563     start = A->cmap->rstart;
4564     cmap  = a->garray;
4565     nzA   = a->A->cmap->n;
4566     nzB   = a->B->cmap->n;
4567     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4568     ncols = 0;
4569     for (i=0; i<nzB; i++) {
4570       if (cmap[i] < start) idx[ncols++] = cmap[i];
4571       else break;
4572     }
4573     imark = i;
4574     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4575     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4576     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4577   } else {
4578     iscola = *col;
4579   }
4580   if (scall != MAT_INITIAL_MATRIX) {
4581     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4582     aloc[0] = *A_loc;
4583   }
4584   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4585   *A_loc = aloc[0];
4586   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4587   if (!row) {
4588     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4589   }
4590   if (!col) {
4591     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4592   }
4593   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4594   PetscFunctionReturn(0);
4595 }
4596 
4597 #undef __FUNCT__
4598 #define __FUNCT__ "MatGetBrowsOfAcols"
4599 /*@C
4600     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4601 
4602     Collective on Mat
4603 
4604    Input Parameters:
4605 +    A,B - the matrices in mpiaij format
4606 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4607 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4608 
4609    Output Parameter:
4610 +    rowb, colb - index sets of rows and columns of B to extract
4611 -    B_seq - the sequential matrix generated
4612 
4613     Level: developer
4614 
4615 @*/
4616 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4617 {
4618   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4619   PetscErrorCode ierr;
4620   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4621   IS             isrowb,iscolb;
4622   Mat            *bseq=NULL;
4623 
4624   PetscFunctionBegin;
4625   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4626     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4627   }
4628   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4629 
4630   if (scall == MAT_INITIAL_MATRIX) {
4631     start = A->cmap->rstart;
4632     cmap  = a->garray;
4633     nzA   = a->A->cmap->n;
4634     nzB   = a->B->cmap->n;
4635     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4636     ncols = 0;
4637     for (i=0; i<nzB; i++) {  /* row < local row index */
4638       if (cmap[i] < start) idx[ncols++] = cmap[i];
4639       else break;
4640     }
4641     imark = i;
4642     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4643     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4644     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4645     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4646   } else {
4647     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4648     isrowb  = *rowb; iscolb = *colb;
4649     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4650     bseq[0] = *B_seq;
4651   }
4652   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4653   *B_seq = bseq[0];
4654   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4655   if (!rowb) {
4656     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4657   } else {
4658     *rowb = isrowb;
4659   }
4660   if (!colb) {
4661     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4662   } else {
4663     *colb = iscolb;
4664   }
4665   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4666   PetscFunctionReturn(0);
4667 }
4668 
4669 #undef __FUNCT__
4670 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4671 /*
4672     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4673     of the OFF-DIAGONAL portion of local A
4674 
4675     Collective on Mat
4676 
4677    Input Parameters:
4678 +    A,B - the matrices in mpiaij format
4679 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4680 
4681    Output Parameter:
4682 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4683 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4684 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4685 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4686 
4687     Level: developer
4688 
4689 */
4690 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4691 {
4692   VecScatter_MPI_General *gen_to,*gen_from;
4693   PetscErrorCode         ierr;
4694   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4695   Mat_SeqAIJ             *b_oth;
4696   VecScatter             ctx =a->Mvctx;
4697   MPI_Comm               comm;
4698   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4699   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4700   PetscScalar            *rvalues,*svalues;
4701   MatScalar              *b_otha,*bufa,*bufA;
4702   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4703   MPI_Request            *rwaits = NULL,*swaits = NULL;
4704   MPI_Status             *sstatus,rstatus;
4705   PetscMPIInt            jj,size;
4706   PetscInt               *cols,sbs,rbs;
4707   PetscScalar            *vals;
4708 
4709   PetscFunctionBegin;
4710   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4711   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4712 
4713   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4714     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4715   }
4716   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4717   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4718 
4719   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4720   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4721   rvalues  = gen_from->values; /* holds the length of receiving row */
4722   svalues  = gen_to->values;   /* holds the length of sending row */
4723   nrecvs   = gen_from->n;
4724   nsends   = gen_to->n;
4725 
4726   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4727   srow    = gen_to->indices;    /* local row index to be sent */
4728   sstarts = gen_to->starts;
4729   sprocs  = gen_to->procs;
4730   sstatus = gen_to->sstatus;
4731   sbs     = gen_to->bs;
4732   rstarts = gen_from->starts;
4733   rprocs  = gen_from->procs;
4734   rbs     = gen_from->bs;
4735 
4736   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4737   if (scall == MAT_INITIAL_MATRIX) {
4738     /* i-array */
4739     /*---------*/
4740     /*  post receives */
4741     for (i=0; i<nrecvs; i++) {
4742       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4743       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4744       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4745     }
4746 
4747     /* pack the outgoing message */
4748     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4749 
4750     sstartsj[0] = 0;
4751     rstartsj[0] = 0;
4752     len         = 0; /* total length of j or a array to be sent */
4753     k           = 0;
4754     for (i=0; i<nsends; i++) {
4755       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4756       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4757       for (j=0; j<nrows; j++) {
4758         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4759         for (l=0; l<sbs; l++) {
4760           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4761 
4762           rowlen[j*sbs+l] = ncols;
4763 
4764           len += ncols;
4765           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4766         }
4767         k++;
4768       }
4769       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4770 
4771       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4772     }
4773     /* recvs and sends of i-array are completed */
4774     i = nrecvs;
4775     while (i--) {
4776       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4777     }
4778     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4779 
4780     /* allocate buffers for sending j and a arrays */
4781     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4782     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4783 
4784     /* create i-array of B_oth */
4785     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4786 
4787     b_othi[0] = 0;
4788     len       = 0; /* total length of j or a array to be received */
4789     k         = 0;
4790     for (i=0; i<nrecvs; i++) {
4791       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4792       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4793       for (j=0; j<nrows; j++) {
4794         b_othi[k+1] = b_othi[k] + rowlen[j];
4795         ierr = PetscIntSumError(rowlen[j],len,&len);
4796         k++;
4797       }
4798       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4799     }
4800 
4801     /* allocate space for j and a arrrays of B_oth */
4802     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4803     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4804 
4805     /* j-array */
4806     /*---------*/
4807     /*  post receives of j-array */
4808     for (i=0; i<nrecvs; i++) {
4809       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4810       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4811     }
4812 
4813     /* pack the outgoing message j-array */
4814     k = 0;
4815     for (i=0; i<nsends; i++) {
4816       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4817       bufJ  = bufj+sstartsj[i];
4818       for (j=0; j<nrows; j++) {
4819         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4820         for (ll=0; ll<sbs; ll++) {
4821           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4822           for (l=0; l<ncols; l++) {
4823             *bufJ++ = cols[l];
4824           }
4825           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4826         }
4827       }
4828       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4829     }
4830 
4831     /* recvs and sends of j-array are completed */
4832     i = nrecvs;
4833     while (i--) {
4834       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4835     }
4836     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4837   } else if (scall == MAT_REUSE_MATRIX) {
4838     sstartsj = *startsj_s;
4839     rstartsj = *startsj_r;
4840     bufa     = *bufa_ptr;
4841     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4842     b_otha   = b_oth->a;
4843   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4844 
4845   /* a-array */
4846   /*---------*/
4847   /*  post receives of a-array */
4848   for (i=0; i<nrecvs; i++) {
4849     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4850     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4851   }
4852 
4853   /* pack the outgoing message a-array */
4854   k = 0;
4855   for (i=0; i<nsends; i++) {
4856     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4857     bufA  = bufa+sstartsj[i];
4858     for (j=0; j<nrows; j++) {
4859       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4860       for (ll=0; ll<sbs; ll++) {
4861         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4862         for (l=0; l<ncols; l++) {
4863           *bufA++ = vals[l];
4864         }
4865         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4866       }
4867     }
4868     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4869   }
4870   /* recvs and sends of a-array are completed */
4871   i = nrecvs;
4872   while (i--) {
4873     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4874   }
4875   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4876   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4877 
4878   if (scall == MAT_INITIAL_MATRIX) {
4879     /* put together the new matrix */
4880     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4881 
4882     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4883     /* Since these are PETSc arrays, change flags to free them as necessary. */
4884     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4885     b_oth->free_a  = PETSC_TRUE;
4886     b_oth->free_ij = PETSC_TRUE;
4887     b_oth->nonew   = 0;
4888 
4889     ierr = PetscFree(bufj);CHKERRQ(ierr);
4890     if (!startsj_s || !bufa_ptr) {
4891       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4892       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4893     } else {
4894       *startsj_s = sstartsj;
4895       *startsj_r = rstartsj;
4896       *bufa_ptr  = bufa;
4897     }
4898   }
4899   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4900   PetscFunctionReturn(0);
4901 }
4902 
4903 #undef __FUNCT__
4904 #define __FUNCT__ "MatGetCommunicationStructs"
4905 /*@C
4906   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4907 
4908   Not Collective
4909 
4910   Input Parameters:
4911 . A - The matrix in mpiaij format
4912 
4913   Output Parameter:
4914 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4915 . colmap - A map from global column index to local index into lvec
4916 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4917 
4918   Level: developer
4919 
4920 @*/
4921 #if defined(PETSC_USE_CTABLE)
4922 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4923 #else
4924 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4925 #endif
4926 {
4927   Mat_MPIAIJ *a;
4928 
4929   PetscFunctionBegin;
4930   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4931   PetscValidPointer(lvec, 2);
4932   PetscValidPointer(colmap, 3);
4933   PetscValidPointer(multScatter, 4);
4934   a = (Mat_MPIAIJ*) A->data;
4935   if (lvec) *lvec = a->lvec;
4936   if (colmap) *colmap = a->colmap;
4937   if (multScatter) *multScatter = a->Mvctx;
4938   PetscFunctionReturn(0);
4939 }
4940 
4941 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4942 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4943 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4944 #if defined(PETSC_HAVE_ELEMENTAL)
4945 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4946 #endif
4947 
4948 #undef __FUNCT__
4949 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4950 /*
4951     Computes (B'*A')' since computing B*A directly is untenable
4952 
4953                n                       p                          p
4954         (              )       (              )         (                  )
4955       m (      A       )  *  n (       B      )   =   m (         C        )
4956         (              )       (              )         (                  )
4957 
4958 */
4959 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4960 {
4961   PetscErrorCode ierr;
4962   Mat            At,Bt,Ct;
4963 
4964   PetscFunctionBegin;
4965   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4966   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4967   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4968   ierr = MatDestroy(&At);CHKERRQ(ierr);
4969   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4970   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4971   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4972   PetscFunctionReturn(0);
4973 }
4974 
4975 #undef __FUNCT__
4976 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4977 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4978 {
4979   PetscErrorCode ierr;
4980   PetscInt       m=A->rmap->n,n=B->cmap->n;
4981   Mat            Cmat;
4982 
4983   PetscFunctionBegin;
4984   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4985   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4986   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4987   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4988   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4989   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4990   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4991   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4992 
4993   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4994 
4995   *C = Cmat;
4996   PetscFunctionReturn(0);
4997 }
4998 
4999 /* ----------------------------------------------------------------*/
5000 #undef __FUNCT__
5001 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5002 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5003 {
5004   PetscErrorCode ierr;
5005 
5006   PetscFunctionBegin;
5007   if (scall == MAT_INITIAL_MATRIX) {
5008     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5009     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5010     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5011   }
5012   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5013   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5014   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5015   PetscFunctionReturn(0);
5016 }
5017 
5018 /*MC
5019    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5020 
5021    Options Database Keys:
5022 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5023 
5024   Level: beginner
5025 
5026 .seealso: MatCreateAIJ()
5027 M*/
5028 
5029 #undef __FUNCT__
5030 #define __FUNCT__ "MatCreate_MPIAIJ"
5031 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5032 {
5033   Mat_MPIAIJ     *b;
5034   PetscErrorCode ierr;
5035   PetscMPIInt    size;
5036 
5037   PetscFunctionBegin;
5038   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5039 
5040   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5041   B->data       = (void*)b;
5042   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5043   B->assembled  = PETSC_FALSE;
5044   B->insertmode = NOT_SET_VALUES;
5045   b->size       = size;
5046 
5047   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5048 
5049   /* build cache for off array entries formed */
5050   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5051 
5052   b->donotstash  = PETSC_FALSE;
5053   b->colmap      = 0;
5054   b->garray      = 0;
5055   b->roworiented = PETSC_TRUE;
5056 
5057   /* stuff used for matrix vector multiply */
5058   b->lvec  = NULL;
5059   b->Mvctx = NULL;
5060 
5061   /* stuff for MatGetRow() */
5062   b->rowindices   = 0;
5063   b->rowvalues    = 0;
5064   b->getrowactive = PETSC_FALSE;
5065 
5066   /* flexible pointer used in CUSP/CUSPARSE classes */
5067   b->spptr = NULL;
5068 
5069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5070   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5072   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5073   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5074   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5075   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5076   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5077   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5080 #if defined(PETSC_HAVE_ELEMENTAL)
5081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5082 #endif
5083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5086   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5087   PetscFunctionReturn(0);
5088 }
5089 
5090 #undef __FUNCT__
5091 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5092 /*@C
5093      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5094          and "off-diagonal" part of the matrix in CSR format.
5095 
5096    Collective on MPI_Comm
5097 
5098    Input Parameters:
5099 +  comm - MPI communicator
5100 .  m - number of local rows (Cannot be PETSC_DECIDE)
5101 .  n - This value should be the same as the local size used in creating the
5102        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5103        calculated if N is given) For square matrices n is almost always m.
5104 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5105 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5106 .   i - row indices for "diagonal" portion of matrix
5107 .   j - column indices
5108 .   a - matrix values
5109 .   oi - row indices for "off-diagonal" portion of matrix
5110 .   oj - column indices
5111 -   oa - matrix values
5112 
5113    Output Parameter:
5114 .   mat - the matrix
5115 
5116    Level: advanced
5117 
5118    Notes:
5119        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5120        must free the arrays once the matrix has been destroyed and not before.
5121 
5122        The i and j indices are 0 based
5123 
5124        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5125 
5126        This sets local rows and cannot be used to set off-processor values.
5127 
5128        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5129        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5130        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5131        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5132        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5133        communication if it is known that only local entries will be set.
5134 
5135 .keywords: matrix, aij, compressed row, sparse, parallel
5136 
5137 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5138           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5139 @*/
5140 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5141 {
5142   PetscErrorCode ierr;
5143   Mat_MPIAIJ     *maij;
5144 
5145   PetscFunctionBegin;
5146   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5147   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5148   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5149   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5150   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5151   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5152   maij = (Mat_MPIAIJ*) (*mat)->data;
5153 
5154   (*mat)->preallocated = PETSC_TRUE;
5155 
5156   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5157   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5158 
5159   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5160   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5161 
5162   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5163   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5164   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5165   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5166 
5167   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5168   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5169   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5170   PetscFunctionReturn(0);
5171 }
5172 
5173 /*
5174     Special version for direct calls from Fortran
5175 */
5176 #include <petsc/private/fortranimpl.h>
5177 
5178 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5179 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5180 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5181 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5182 #endif
5183 
5184 /* Change these macros so can be used in void function */
5185 #undef CHKERRQ
5186 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5187 #undef SETERRQ2
5188 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5189 #undef SETERRQ3
5190 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5191 #undef SETERRQ
5192 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5193 
5194 #undef __FUNCT__
5195 #define __FUNCT__ "matsetvaluesmpiaij_"
5196 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5197 {
5198   Mat            mat  = *mmat;
5199   PetscInt       m    = *mm, n = *mn;
5200   InsertMode     addv = *maddv;
5201   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5202   PetscScalar    value;
5203   PetscErrorCode ierr;
5204 
5205   MatCheckPreallocated(mat,1);
5206   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5207 
5208 #if defined(PETSC_USE_DEBUG)
5209   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5210 #endif
5211   {
5212     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5213     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5214     PetscBool roworiented = aij->roworiented;
5215 
5216     /* Some Variables required in the macro */
5217     Mat        A                 = aij->A;
5218     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5219     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5220     MatScalar  *aa               = a->a;
5221     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5222     Mat        B                 = aij->B;
5223     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5224     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5225     MatScalar  *ba               = b->a;
5226 
5227     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5228     PetscInt  nonew = a->nonew;
5229     MatScalar *ap1,*ap2;
5230 
5231     PetscFunctionBegin;
5232     for (i=0; i<m; i++) {
5233       if (im[i] < 0) continue;
5234 #if defined(PETSC_USE_DEBUG)
5235       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5236 #endif
5237       if (im[i] >= rstart && im[i] < rend) {
5238         row      = im[i] - rstart;
5239         lastcol1 = -1;
5240         rp1      = aj + ai[row];
5241         ap1      = aa + ai[row];
5242         rmax1    = aimax[row];
5243         nrow1    = ailen[row];
5244         low1     = 0;
5245         high1    = nrow1;
5246         lastcol2 = -1;
5247         rp2      = bj + bi[row];
5248         ap2      = ba + bi[row];
5249         rmax2    = bimax[row];
5250         nrow2    = bilen[row];
5251         low2     = 0;
5252         high2    = nrow2;
5253 
5254         for (j=0; j<n; j++) {
5255           if (roworiented) value = v[i*n+j];
5256           else value = v[i+j*m];
5257           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5258           if (in[j] >= cstart && in[j] < cend) {
5259             col = in[j] - cstart;
5260             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5261           } else if (in[j] < 0) continue;
5262 #if defined(PETSC_USE_DEBUG)
5263           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5264 #endif
5265           else {
5266             if (mat->was_assembled) {
5267               if (!aij->colmap) {
5268                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5269               }
5270 #if defined(PETSC_USE_CTABLE)
5271               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5272               col--;
5273 #else
5274               col = aij->colmap[in[j]] - 1;
5275 #endif
5276               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5277                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5278                 col  =  in[j];
5279                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5280                 B     = aij->B;
5281                 b     = (Mat_SeqAIJ*)B->data;
5282                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5283                 rp2   = bj + bi[row];
5284                 ap2   = ba + bi[row];
5285                 rmax2 = bimax[row];
5286                 nrow2 = bilen[row];
5287                 low2  = 0;
5288                 high2 = nrow2;
5289                 bm    = aij->B->rmap->n;
5290                 ba    = b->a;
5291               }
5292             } else col = in[j];
5293             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5294           }
5295         }
5296       } else if (!aij->donotstash) {
5297         if (roworiented) {
5298           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5299         } else {
5300           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5301         }
5302       }
5303     }
5304   }
5305   PetscFunctionReturnVoid();
5306 }
5307 
5308