xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision df0b854cade274ed9a7621633a20ef3035ae0f9f)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *owners = A->rmap->range;
787   PetscInt       n      = A->rmap->n;
788   PetscSF        sf;
789   PetscInt      *lrows;
790   PetscSFNode   *rrows;
791   PetscInt       r, p = 0, len = 0;
792   PetscErrorCode ierr;
793 
794   PetscFunctionBegin;
795   /* Create SF where leaves are input rows and roots are owned rows */
796   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
797   for (r = 0; r < n; ++r) lrows[r] = -1;
798   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
799   for (r = 0; r < N; ++r) {
800     const PetscInt idx   = rows[r];
801     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
802     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
803       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
804     }
805     if (A->nooffproczerorows) {
806       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
807       lrows[len++] = idx - owners[p];
808     } else {
809       rrows[r].rank = p;
810       rrows[r].index = rows[r] - owners[p];
811     }
812   }
813   if (!A->nooffproczerorows) {
814     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
815     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
816     /* Collect flags for rows to be zeroed */
817     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
818     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
819     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
820     /* Compress and put in row numbers */
821     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
822   }
823   /* fix right hand side if needed */
824   if (x && b) {
825     const PetscScalar *xx;
826     PetscScalar       *bb;
827 
828     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
829     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
830     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
831     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
833   }
834   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
835   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
836   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
837     PetscBool cong;
838     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
839     if (cong) A->congruentlayouts = 1;
840     else      A->congruentlayouts = 0;
841   }
842   if ((diag != 0.0) && A->congruentlayouts) {
843     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
844   } else if (diag != 0.0) {
845     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
846     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
847     for (r = 0; r < len; ++r) {
848       const PetscInt row = lrows[r] + A->rmap->rstart;
849       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
850     }
851     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
852     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
853   } else {
854     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
855   }
856   ierr = PetscFree(lrows);CHKERRQ(ierr);
857 
858   /* only change matrix nonzero state if pattern was allowed to be changed */
859   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
860     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
861     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
862   }
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
868 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
869 {
870   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       n = A->rmap->n;
873   PetscInt          i,j,r,m,p = 0,len = 0;
874   PetscInt          *lrows,*owners = A->rmap->range;
875   PetscSFNode       *rrows;
876   PetscSF           sf;
877   const PetscScalar *xx;
878   PetscScalar       *bb,*mask;
879   Vec               xmask,lmask;
880   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
881   const PetscInt    *aj, *ii,*ridx;
882   PetscScalar       *aa;
883 
884   PetscFunctionBegin;
885   /* Create SF where leaves are input rows and roots are owned rows */
886   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
887   for (r = 0; r < n; ++r) lrows[r] = -1;
888   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
889   for (r = 0; r < N; ++r) {
890     const PetscInt idx   = rows[r];
891     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
892     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
893       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
894     }
895     rrows[r].rank  = p;
896     rrows[r].index = rows[r] - owners[p];
897   }
898   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
899   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
900   /* Collect flags for rows to be zeroed */
901   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
902   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
903   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
904   /* Compress and put in row numbers */
905   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
906   /* zero diagonal part of matrix */
907   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
908   /* handle off diagonal part of matrix */
909   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
910   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
911   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
912   for (i=0; i<len; i++) bb[lrows[i]] = 1;
913   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
914   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
917   if (x) {
918     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
922   }
923   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
924   /* remove zeroed rows of off diagonal matrix */
925   ii = aij->i;
926   for (i=0; i<len; i++) {
927     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
928   }
929   /* loop over all elements of off process part of matrix zeroing removed columns*/
930   if (aij->compressedrow.use) {
931     m    = aij->compressedrow.nrows;
932     ii   = aij->compressedrow.i;
933     ridx = aij->compressedrow.rindex;
934     for (i=0; i<m; i++) {
935       n  = ii[i+1] - ii[i];
936       aj = aij->j + ii[i];
937       aa = aij->a + ii[i];
938 
939       for (j=0; j<n; j++) {
940         if (PetscAbsScalar(mask[*aj])) {
941           if (b) bb[*ridx] -= *aa*xx[*aj];
942           *aa = 0.0;
943         }
944         aa++;
945         aj++;
946       }
947       ridx++;
948     }
949   } else { /* do not use compressed row format */
950     m = l->B->rmap->n;
951     for (i=0; i<m; i++) {
952       n  = ii[i+1] - ii[i];
953       aj = aij->j + ii[i];
954       aa = aij->a + ii[i];
955       for (j=0; j<n; j++) {
956         if (PetscAbsScalar(mask[*aj])) {
957           if (b) bb[i] -= *aa*xx[*aj];
958           *aa = 0.0;
959         }
960         aa++;
961         aj++;
962       }
963     }
964   }
965   if (x) {
966     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
967     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
968   }
969   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
970   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
971   ierr = PetscFree(lrows);CHKERRQ(ierr);
972 
973   /* only change matrix nonzero state if pattern was allowed to be changed */
974   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
975     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
976     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
977   }
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMult_MPIAIJ"
983 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987   PetscInt       nt;
988 
989   PetscFunctionBegin;
990   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
991   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
992   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
994   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
995   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
996   PetscFunctionReturn(0);
997 }
998 
999 #undef __FUNCT__
1000 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1001 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1002 {
1003   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1004   PetscErrorCode ierr;
1005 
1006   PetscFunctionBegin;
1007   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1013 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1014 {
1015   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1016   PetscErrorCode ierr;
1017 
1018   PetscFunctionBegin;
1019   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1020   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1021   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1022   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 #undef __FUNCT__
1027 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1028 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1029 {
1030   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1031   PetscErrorCode ierr;
1032   PetscBool      merged;
1033 
1034   PetscFunctionBegin;
1035   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1036   /* do nondiagonal part */
1037   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1038   if (!merged) {
1039     /* send it on its way */
1040     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1041     /* do local part */
1042     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1043     /* receive remote parts: note this assumes the values are not actually */
1044     /* added in yy until the next line, */
1045     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1046   } else {
1047     /* do local part */
1048     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1049     /* send it on its way */
1050     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1051     /* values actually were received in the Begin() but we need to call this nop */
1052     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1053   }
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1059 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1060 {
1061   MPI_Comm       comm;
1062   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1063   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1064   IS             Me,Notme;
1065   PetscErrorCode ierr;
1066   PetscInt       M,N,first,last,*notme,i;
1067   PetscMPIInt    size;
1068 
1069   PetscFunctionBegin;
1070   /* Easy test: symmetric diagonal block */
1071   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1072   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1073   if (!*f) PetscFunctionReturn(0);
1074   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1075   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1076   if (size == 1) PetscFunctionReturn(0);
1077 
1078   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1079   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1080   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1081   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1082   for (i=0; i<first; i++) notme[i] = i;
1083   for (i=last; i<M; i++) notme[i-last+first] = i;
1084   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1085   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1086   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1087   Aoff = Aoffs[0];
1088   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1089   Boff = Boffs[0];
1090   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1091   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1092   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1093   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1094   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1095   ierr = PetscFree(notme);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1101 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   /* do nondiagonal part */
1108   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1109   /* send it on its way */
1110   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1111   /* do local part */
1112   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1113   /* receive remote parts */
1114   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 /*
1119   This only works correctly for square matrices where the subblock A->A is the
1120    diagonal block
1121 */
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1124 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1125 {
1126   PetscErrorCode ierr;
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128 
1129   PetscFunctionBegin;
1130   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1131   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1132   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatScale_MPIAIJ"
1138 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1139 {
1140   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1141   PetscErrorCode ierr;
1142 
1143   PetscFunctionBegin;
1144   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1145   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatDestroy_MPIAIJ"
1151 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1152 {
1153   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1154   PetscErrorCode ierr;
1155 
1156   PetscFunctionBegin;
1157 #if defined(PETSC_USE_LOG)
1158   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1159 #endif
1160   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1161   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1162   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1163   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1164 #if defined(PETSC_USE_CTABLE)
1165   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1166 #else
1167   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1168 #endif
1169   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1170   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1171   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1172   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1173   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1174   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1175 
1176   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1185 #if defined(PETSC_HAVE_ELEMENTAL)
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1187 #endif
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1193 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1194 {
1195   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1196   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1197   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1198   PetscErrorCode ierr;
1199   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1200   int            fd;
1201   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1202   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1203   PetscScalar    *column_values;
1204   PetscInt       message_count,flowcontrolcount;
1205   FILE           *file;
1206 
1207   PetscFunctionBegin;
1208   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1209   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1210   nz   = A->nz + B->nz;
1211   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1212   if (!rank) {
1213     header[0] = MAT_FILE_CLASSID;
1214     header[1] = mat->rmap->N;
1215     header[2] = mat->cmap->N;
1216 
1217     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1218     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1219     /* get largest number of rows any processor has */
1220     rlen  = mat->rmap->n;
1221     range = mat->rmap->range;
1222     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1223   } else {
1224     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225     rlen = mat->rmap->n;
1226   }
1227 
1228   /* load up the local row counts */
1229   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1230   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1231 
1232   /* store the row lengths to the file */
1233   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1234   if (!rank) {
1235     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1236     for (i=1; i<size; i++) {
1237       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1238       rlen = range[i+1] - range[i];
1239       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1241     }
1242     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1243   } else {
1244     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1245     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1246     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1247   }
1248   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1249 
1250   /* load up the local column indices */
1251   nzmax = nz; /* th processor needs space a largest processor needs */
1252   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1253   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1254   cnt   = 0;
1255   for (i=0; i<mat->rmap->n; i++) {
1256     for (j=B->i[i]; j<B->i[i+1]; j++) {
1257       if ((col = garray[B->j[j]]) > cstart) break;
1258       column_indices[cnt++] = col;
1259     }
1260     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1261     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1262   }
1263   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1264 
1265   /* store the column indices to the file */
1266   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1267   if (!rank) {
1268     MPI_Status status;
1269     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1270     for (i=1; i<size; i++) {
1271       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1272       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1273       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1274       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1275       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1276     }
1277     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1278   } else {
1279     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1280     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1283   }
1284   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1285 
1286   /* load up the local column values */
1287   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1288   cnt  = 0;
1289   for (i=0; i<mat->rmap->n; i++) {
1290     for (j=B->i[i]; j<B->i[i+1]; j++) {
1291       if (garray[B->j[j]] > cstart) break;
1292       column_values[cnt++] = B->a[j];
1293     }
1294     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1295     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1296   }
1297   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1298 
1299   /* store the column values to the file */
1300   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1301   if (!rank) {
1302     MPI_Status status;
1303     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1304     for (i=1; i<size; i++) {
1305       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1306       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1307       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1308       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1309       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1310     }
1311     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1312   } else {
1313     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1314     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1315     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1316     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1317   }
1318   ierr = PetscFree(column_values);CHKERRQ(ierr);
1319 
1320   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1321   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #include <petscdraw.h>
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1328 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1329 {
1330   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1331   PetscErrorCode    ierr;
1332   PetscMPIInt       rank = aij->rank,size = aij->size;
1333   PetscBool         isdraw,iascii,isbinary;
1334   PetscViewer       sviewer;
1335   PetscViewerFormat format;
1336 
1337   PetscFunctionBegin;
1338   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1339   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1340   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1341   if (iascii) {
1342     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1343     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1344       MatInfo   info;
1345       PetscBool inodes;
1346 
1347       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1348       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1349       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1350       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1351       if (!inodes) {
1352         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1353                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1354       } else {
1355         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1356                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1357       }
1358       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1359       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1360       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1361       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1362       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1363       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1365       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1366       PetscFunctionReturn(0);
1367     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1368       PetscInt inodecount,inodelimit,*inodes;
1369       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1370       if (inodes) {
1371         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1372       } else {
1373         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1374       }
1375       PetscFunctionReturn(0);
1376     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1377       PetscFunctionReturn(0);
1378     }
1379   } else if (isbinary) {
1380     if (size == 1) {
1381       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1382       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1383     } else {
1384       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1385     }
1386     PetscFunctionReturn(0);
1387   } else if (isdraw) {
1388     PetscDraw draw;
1389     PetscBool isnull;
1390     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1391     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1392     if (isnull) PetscFunctionReturn(0);
1393   }
1394 
1395   {
1396     /* assemble the entire matrix onto first processor. */
1397     Mat        A;
1398     Mat_SeqAIJ *Aloc;
1399     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1400     MatScalar  *a;
1401 
1402     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1403     if (!rank) {
1404       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1405     } else {
1406       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1407     }
1408     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1409     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1410     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1411     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1412     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1413 
1414     /* copy over the A part */
1415     Aloc = (Mat_SeqAIJ*)aij->A->data;
1416     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1417     row  = mat->rmap->rstart;
1418     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1419     for (i=0; i<m; i++) {
1420       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1421       row++;
1422       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1423     }
1424     aj = Aloc->j;
1425     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1426 
1427     /* copy over the B part */
1428     Aloc = (Mat_SeqAIJ*)aij->B->data;
1429     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1430     row  = mat->rmap->rstart;
1431     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1432     ct   = cols;
1433     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1434     for (i=0; i<m; i++) {
1435       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1436       row++;
1437       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1438     }
1439     ierr = PetscFree(ct);CHKERRQ(ierr);
1440     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1441     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1442     /*
1443        Everyone has to call to draw the matrix since the graphics waits are
1444        synchronized across all processors that share the PetscDraw object
1445     */
1446     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1447     if (!rank) {
1448       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1449       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1450     }
1451     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1452     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1453     ierr = MatDestroy(&A);CHKERRQ(ierr);
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ"
1460 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1461 {
1462   PetscErrorCode ierr;
1463   PetscBool      iascii,isdraw,issocket,isbinary;
1464 
1465   PetscFunctionBegin;
1466   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1468   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1469   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1470   if (iascii || isdraw || isbinary || issocket) {
1471     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatSOR_MPIAIJ"
1478 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1479 {
1480   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1481   PetscErrorCode ierr;
1482   Vec            bb1 = 0;
1483   PetscBool      hasop;
1484 
1485   PetscFunctionBegin;
1486   if (flag == SOR_APPLY_UPPER) {
1487     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1488     PetscFunctionReturn(0);
1489   }
1490 
1491   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1492     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1493   }
1494 
1495   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1496     if (flag & SOR_ZERO_INITIAL_GUESS) {
1497       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1498       its--;
1499     }
1500 
1501     while (its--) {
1502       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504 
1505       /* update rhs: bb1 = bb - B*x */
1506       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1507       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1508 
1509       /* local sweep */
1510       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1511     }
1512   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1513     if (flag & SOR_ZERO_INITIAL_GUESS) {
1514       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1515       its--;
1516     }
1517     while (its--) {
1518       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520 
1521       /* update rhs: bb1 = bb - B*x */
1522       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1523       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1524 
1525       /* local sweep */
1526       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1527     }
1528   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1529     if (flag & SOR_ZERO_INITIAL_GUESS) {
1530       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1531       its--;
1532     }
1533     while (its--) {
1534       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536 
1537       /* update rhs: bb1 = bb - B*x */
1538       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1539       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1540 
1541       /* local sweep */
1542       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1543     }
1544   } else if (flag & SOR_EISENSTAT) {
1545     Vec xx1;
1546 
1547     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1548     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1549 
1550     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552     if (!mat->diag) {
1553       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1554       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1555     }
1556     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1557     if (hasop) {
1558       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1559     } else {
1560       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1561     }
1562     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1563 
1564     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1565 
1566     /* local sweep */
1567     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1568     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1569     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1570   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1571 
1572   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1573 
1574   matin->errortype = mat->A->errortype;
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatPermute_MPIAIJ"
1580 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1581 {
1582   Mat            aA,aB,Aperm;
1583   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1584   PetscScalar    *aa,*ba;
1585   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1586   PetscSF        rowsf,sf;
1587   IS             parcolp = NULL;
1588   PetscBool      done;
1589   PetscErrorCode ierr;
1590 
1591   PetscFunctionBegin;
1592   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1593   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1594   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1595   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1596 
1597   /* Invert row permutation to find out where my rows should go */
1598   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1599   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1600   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1601   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1602   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1603   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1604 
1605   /* Invert column permutation to find out where my columns should go */
1606   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1607   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1608   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1609   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1610   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1611   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1612   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1613 
1614   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1615   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1616   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1617 
1618   /* Find out where my gcols should go */
1619   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1620   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1621   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1622   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1623   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1624   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1625   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1626   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1627 
1628   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1629   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1630   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1631   for (i=0; i<m; i++) {
1632     PetscInt row = rdest[i],rowner;
1633     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1634     for (j=ai[i]; j<ai[i+1]; j++) {
1635       PetscInt cowner,col = cdest[aj[j]];
1636       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1637       if (rowner == cowner) dnnz[i]++;
1638       else onnz[i]++;
1639     }
1640     for (j=bi[i]; j<bi[i+1]; j++) {
1641       PetscInt cowner,col = gcdest[bj[j]];
1642       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1643       if (rowner == cowner) dnnz[i]++;
1644       else onnz[i]++;
1645     }
1646   }
1647   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1648   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1649   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1650   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1651   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1652 
1653   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1654   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1655   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1656   for (i=0; i<m; i++) {
1657     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1658     PetscInt j0,rowlen;
1659     rowlen = ai[i+1] - ai[i];
1660     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1661       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1662       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1663     }
1664     rowlen = bi[i+1] - bi[i];
1665     for (j0=j=0; j<rowlen; j0=j) {
1666       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1667       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1668     }
1669   }
1670   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1673   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1674   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1675   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1676   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1677   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1678   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1679   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1680   *B = Aperm;
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1686 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1687 {
1688   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1689   PetscErrorCode ierr;
1690 
1691   PetscFunctionBegin;
1692   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1693   if (ghosts) *ghosts = aij->garray;
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1699 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1700 {
1701   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1702   Mat            A    = mat->A,B = mat->B;
1703   PetscErrorCode ierr;
1704   PetscReal      isend[5],irecv[5];
1705 
1706   PetscFunctionBegin;
1707   info->block_size = 1.0;
1708   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1709 
1710   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1711   isend[3] = info->memory;  isend[4] = info->mallocs;
1712 
1713   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1714 
1715   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1716   isend[3] += info->memory;  isend[4] += info->mallocs;
1717   if (flag == MAT_LOCAL) {
1718     info->nz_used      = isend[0];
1719     info->nz_allocated = isend[1];
1720     info->nz_unneeded  = isend[2];
1721     info->memory       = isend[3];
1722     info->mallocs      = isend[4];
1723   } else if (flag == MAT_GLOBAL_MAX) {
1724     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1725 
1726     info->nz_used      = irecv[0];
1727     info->nz_allocated = irecv[1];
1728     info->nz_unneeded  = irecv[2];
1729     info->memory       = irecv[3];
1730     info->mallocs      = irecv[4];
1731   } else if (flag == MAT_GLOBAL_SUM) {
1732     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1733 
1734     info->nz_used      = irecv[0];
1735     info->nz_allocated = irecv[1];
1736     info->nz_unneeded  = irecv[2];
1737     info->memory       = irecv[3];
1738     info->mallocs      = irecv[4];
1739   }
1740   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1741   info->fill_ratio_needed = 0;
1742   info->factor_mallocs    = 0;
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatSetOption_MPIAIJ"
1748 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1749 {
1750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1751   PetscErrorCode ierr;
1752 
1753   PetscFunctionBegin;
1754   switch (op) {
1755   case MAT_NEW_NONZERO_LOCATIONS:
1756   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1757   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1758   case MAT_KEEP_NONZERO_PATTERN:
1759   case MAT_NEW_NONZERO_LOCATION_ERR:
1760   case MAT_USE_INODES:
1761   case MAT_IGNORE_ZERO_ENTRIES:
1762     MatCheckPreallocated(A,1);
1763     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1764     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1765     break;
1766   case MAT_ROW_ORIENTED:
1767     MatCheckPreallocated(A,1);
1768     a->roworiented = flg;
1769 
1770     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1771     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1772     break;
1773   case MAT_NEW_DIAGONALS:
1774     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1775     break;
1776   case MAT_IGNORE_OFF_PROC_ENTRIES:
1777     a->donotstash = flg;
1778     break;
1779   case MAT_SPD:
1780     A->spd_set = PETSC_TRUE;
1781     A->spd     = flg;
1782     if (flg) {
1783       A->symmetric                  = PETSC_TRUE;
1784       A->structurally_symmetric     = PETSC_TRUE;
1785       A->symmetric_set              = PETSC_TRUE;
1786       A->structurally_symmetric_set = PETSC_TRUE;
1787     }
1788     break;
1789   case MAT_SYMMETRIC:
1790     MatCheckPreallocated(A,1);
1791     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1792     break;
1793   case MAT_STRUCTURALLY_SYMMETRIC:
1794     MatCheckPreallocated(A,1);
1795     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1796     break;
1797   case MAT_HERMITIAN:
1798     MatCheckPreallocated(A,1);
1799     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1800     break;
1801   case MAT_SYMMETRY_ETERNAL:
1802     MatCheckPreallocated(A,1);
1803     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1804     break;
1805   default:
1806     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1807   }
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatGetRow_MPIAIJ"
1813 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1814 {
1815   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1816   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1817   PetscErrorCode ierr;
1818   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1819   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1820   PetscInt       *cmap,*idx_p;
1821 
1822   PetscFunctionBegin;
1823   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1824   mat->getrowactive = PETSC_TRUE;
1825 
1826   if (!mat->rowvalues && (idx || v)) {
1827     /*
1828         allocate enough space to hold information from the longest row.
1829     */
1830     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1831     PetscInt   max = 1,tmp;
1832     for (i=0; i<matin->rmap->n; i++) {
1833       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1834       if (max < tmp) max = tmp;
1835     }
1836     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1837   }
1838 
1839   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1840   lrow = row - rstart;
1841 
1842   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1843   if (!v)   {pvA = 0; pvB = 0;}
1844   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1845   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1846   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1847   nztot = nzA + nzB;
1848 
1849   cmap = mat->garray;
1850   if (v  || idx) {
1851     if (nztot) {
1852       /* Sort by increasing column numbers, assuming A and B already sorted */
1853       PetscInt imark = -1;
1854       if (v) {
1855         *v = v_p = mat->rowvalues;
1856         for (i=0; i<nzB; i++) {
1857           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1858           else break;
1859         }
1860         imark = i;
1861         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1862         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1863       }
1864       if (idx) {
1865         *idx = idx_p = mat->rowindices;
1866         if (imark > -1) {
1867           for (i=0; i<imark; i++) {
1868             idx_p[i] = cmap[cworkB[i]];
1869           }
1870         } else {
1871           for (i=0; i<nzB; i++) {
1872             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1873             else break;
1874           }
1875           imark = i;
1876         }
1877         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1878         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1879       }
1880     } else {
1881       if (idx) *idx = 0;
1882       if (v)   *v   = 0;
1883     }
1884   }
1885   *nz  = nztot;
1886   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1887   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 #undef __FUNCT__
1892 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1893 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1894 {
1895   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1896 
1897   PetscFunctionBegin;
1898   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1899   aij->getrowactive = PETSC_FALSE;
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatNorm_MPIAIJ"
1905 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1906 {
1907   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1908   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1909   PetscErrorCode ierr;
1910   PetscInt       i,j,cstart = mat->cmap->rstart;
1911   PetscReal      sum = 0.0;
1912   MatScalar      *v;
1913 
1914   PetscFunctionBegin;
1915   if (aij->size == 1) {
1916     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1917   } else {
1918     if (type == NORM_FROBENIUS) {
1919       v = amat->a;
1920       for (i=0; i<amat->nz; i++) {
1921         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1922       }
1923       v = bmat->a;
1924       for (i=0; i<bmat->nz; i++) {
1925         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1926       }
1927       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1928       *norm = PetscSqrtReal(*norm);
1929       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1930     } else if (type == NORM_1) { /* max column norm */
1931       PetscReal *tmp,*tmp2;
1932       PetscInt  *jj,*garray = aij->garray;
1933       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1934       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1935       *norm = 0.0;
1936       v     = amat->a; jj = amat->j;
1937       for (j=0; j<amat->nz; j++) {
1938         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1939       }
1940       v = bmat->a; jj = bmat->j;
1941       for (j=0; j<bmat->nz; j++) {
1942         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1943       }
1944       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1945       for (j=0; j<mat->cmap->N; j++) {
1946         if (tmp2[j] > *norm) *norm = tmp2[j];
1947       }
1948       ierr = PetscFree(tmp);CHKERRQ(ierr);
1949       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1950       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1951     } else if (type == NORM_INFINITY) { /* max row norm */
1952       PetscReal ntemp = 0.0;
1953       for (j=0; j<aij->A->rmap->n; j++) {
1954         v   = amat->a + amat->i[j];
1955         sum = 0.0;
1956         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1957           sum += PetscAbsScalar(*v); v++;
1958         }
1959         v = bmat->a + bmat->i[j];
1960         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1961           sum += PetscAbsScalar(*v); v++;
1962         }
1963         if (sum > ntemp) ntemp = sum;
1964       }
1965       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1966       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1967     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1968   }
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatTranspose_MPIAIJ"
1974 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1975 {
1976   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1977   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1978   PetscErrorCode ierr;
1979   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1980   PetscInt       cstart = A->cmap->rstart,ncol;
1981   Mat            B;
1982   MatScalar      *array;
1983 
1984   PetscFunctionBegin;
1985   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1986 
1987   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1988   ai = Aloc->i; aj = Aloc->j;
1989   bi = Bloc->i; bj = Bloc->j;
1990   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1991     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1992     PetscSFNode          *oloc;
1993     PETSC_UNUSED PetscSF sf;
1994 
1995     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1996     /* compute d_nnz for preallocation */
1997     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1998     for (i=0; i<ai[ma]; i++) {
1999       d_nnz[aj[i]]++;
2000       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2001     }
2002     /* compute local off-diagonal contributions */
2003     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2004     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2005     /* map those to global */
2006     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2007     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2008     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2009     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2010     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2011     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2012     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2013 
2014     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2015     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2016     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2017     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2018     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2019     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2020   } else {
2021     B    = *matout;
2022     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2023     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2024   }
2025 
2026   /* copy over the A part */
2027   array = Aloc->a;
2028   row   = A->rmap->rstart;
2029   for (i=0; i<ma; i++) {
2030     ncol = ai[i+1]-ai[i];
2031     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2032     row++;
2033     array += ncol; aj += ncol;
2034   }
2035   aj = Aloc->j;
2036   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2037 
2038   /* copy over the B part */
2039   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2040   array = Bloc->a;
2041   row   = A->rmap->rstart;
2042   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2043   cols_tmp = cols;
2044   for (i=0; i<mb; i++) {
2045     ncol = bi[i+1]-bi[i];
2046     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2047     row++;
2048     array += ncol; cols_tmp += ncol;
2049   }
2050   ierr = PetscFree(cols);CHKERRQ(ierr);
2051 
2052   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2053   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2054   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2055     *matout = B;
2056   } else {
2057     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2058   }
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 #undef __FUNCT__
2063 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2064 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2065 {
2066   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2067   Mat            a    = aij->A,b = aij->B;
2068   PetscErrorCode ierr;
2069   PetscInt       s1,s2,s3;
2070 
2071   PetscFunctionBegin;
2072   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2073   if (rr) {
2074     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2075     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2076     /* Overlap communication with computation. */
2077     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2078   }
2079   if (ll) {
2080     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2081     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2082     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2083   }
2084   /* scale  the diagonal block */
2085   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2086 
2087   if (rr) {
2088     /* Do a scatter end and then right scale the off-diagonal block */
2089     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2090     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2091   }
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2097 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2098 {
2099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2100   PetscErrorCode ierr;
2101 
2102   PetscFunctionBegin;
2103   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 #undef __FUNCT__
2108 #define __FUNCT__ "MatEqual_MPIAIJ"
2109 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2110 {
2111   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2112   Mat            a,b,c,d;
2113   PetscBool      flg;
2114   PetscErrorCode ierr;
2115 
2116   PetscFunctionBegin;
2117   a = matA->A; b = matA->B;
2118   c = matB->A; d = matB->B;
2119 
2120   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2121   if (flg) {
2122     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2123   }
2124   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 #undef __FUNCT__
2129 #define __FUNCT__ "MatCopy_MPIAIJ"
2130 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2131 {
2132   PetscErrorCode ierr;
2133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2134   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2135 
2136   PetscFunctionBegin;
2137   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2138   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2139     /* because of the column compression in the off-processor part of the matrix a->B,
2140        the number of columns in a->B and b->B may be different, hence we cannot call
2141        the MatCopy() directly on the two parts. If need be, we can provide a more
2142        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2143        then copying the submatrices */
2144     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2145   } else {
2146     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2147     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2148   }
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatSetUp_MPIAIJ"
2154 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2155 {
2156   PetscErrorCode ierr;
2157 
2158   PetscFunctionBegin;
2159   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 /*
2164    Computes the number of nonzeros per row needed for preallocation when X and Y
2165    have different nonzero structure.
2166 */
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2169 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2170 {
2171   PetscInt       i,j,k,nzx,nzy;
2172 
2173   PetscFunctionBegin;
2174   /* Set the number of nonzeros in the new matrix */
2175   for (i=0; i<m; i++) {
2176     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2177     nzx = xi[i+1] - xi[i];
2178     nzy = yi[i+1] - yi[i];
2179     nnz[i] = 0;
2180     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2181       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2182       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2183       nnz[i]++;
2184     }
2185     for (; k<nzy; k++) nnz[i]++;
2186   }
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2193 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2194 {
2195   PetscErrorCode ierr;
2196   PetscInt       m = Y->rmap->N;
2197   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2198   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2199 
2200   PetscFunctionBegin;
2201   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 #undef __FUNCT__
2206 #define __FUNCT__ "MatAXPY_MPIAIJ"
2207 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2208 {
2209   PetscErrorCode ierr;
2210   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2211   PetscBLASInt   bnz,one=1;
2212   Mat_SeqAIJ     *x,*y;
2213 
2214   PetscFunctionBegin;
2215   if (str == SAME_NONZERO_PATTERN) {
2216     PetscScalar alpha = a;
2217     x    = (Mat_SeqAIJ*)xx->A->data;
2218     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2219     y    = (Mat_SeqAIJ*)yy->A->data;
2220     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2221     x    = (Mat_SeqAIJ*)xx->B->data;
2222     y    = (Mat_SeqAIJ*)yy->B->data;
2223     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2224     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2225     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2226   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2227     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2228   } else {
2229     Mat      B;
2230     PetscInt *nnz_d,*nnz_o;
2231     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2232     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2233     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2234     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2235     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2236     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2237     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2238     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2239     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2240     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2241     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2242     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2243     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2244     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2245   }
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2250 
2251 #undef __FUNCT__
2252 #define __FUNCT__ "MatConjugate_MPIAIJ"
2253 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2254 {
2255 #if defined(PETSC_USE_COMPLEX)
2256   PetscErrorCode ierr;
2257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2258 
2259   PetscFunctionBegin;
2260   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2261   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2262 #else
2263   PetscFunctionBegin;
2264 #endif
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatRealPart_MPIAIJ"
2270 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2271 {
2272   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2273   PetscErrorCode ierr;
2274 
2275   PetscFunctionBegin;
2276   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2277   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2283 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2284 {
2285   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2286   PetscErrorCode ierr;
2287 
2288   PetscFunctionBegin;
2289   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2290   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2296 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2297 {
2298   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2299   PetscErrorCode ierr;
2300   PetscInt       i,*idxb = 0;
2301   PetscScalar    *va,*vb;
2302   Vec            vtmp;
2303 
2304   PetscFunctionBegin;
2305   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2306   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2307   if (idx) {
2308     for (i=0; i<A->rmap->n; i++) {
2309       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2310     }
2311   }
2312 
2313   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2314   if (idx) {
2315     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2316   }
2317   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2318   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2319 
2320   for (i=0; i<A->rmap->n; i++) {
2321     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2322       va[i] = vb[i];
2323       if (idx) idx[i] = a->garray[idxb[i]];
2324     }
2325   }
2326 
2327   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2328   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2329   ierr = PetscFree(idxb);CHKERRQ(ierr);
2330   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 #undef __FUNCT__
2335 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2336 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2337 {
2338   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2339   PetscErrorCode ierr;
2340   PetscInt       i,*idxb = 0;
2341   PetscScalar    *va,*vb;
2342   Vec            vtmp;
2343 
2344   PetscFunctionBegin;
2345   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2346   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2347   if (idx) {
2348     for (i=0; i<A->cmap->n; i++) {
2349       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2350     }
2351   }
2352 
2353   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2354   if (idx) {
2355     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2356   }
2357   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2358   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2359 
2360   for (i=0; i<A->rmap->n; i++) {
2361     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2362       va[i] = vb[i];
2363       if (idx) idx[i] = a->garray[idxb[i]];
2364     }
2365   }
2366 
2367   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2368   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2369   ierr = PetscFree(idxb);CHKERRQ(ierr);
2370   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2376 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2377 {
2378   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2379   PetscInt       n      = A->rmap->n;
2380   PetscInt       cstart = A->cmap->rstart;
2381   PetscInt       *cmap  = mat->garray;
2382   PetscInt       *diagIdx, *offdiagIdx;
2383   Vec            diagV, offdiagV;
2384   PetscScalar    *a, *diagA, *offdiagA;
2385   PetscInt       r;
2386   PetscErrorCode ierr;
2387 
2388   PetscFunctionBegin;
2389   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2390   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2391   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2392   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2393   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2394   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2395   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2396   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2397   for (r = 0; r < n; ++r) {
2398     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2399       a[r]   = diagA[r];
2400       idx[r] = cstart + diagIdx[r];
2401     } else {
2402       a[r]   = offdiagA[r];
2403       idx[r] = cmap[offdiagIdx[r]];
2404     }
2405   }
2406   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2407   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2408   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2409   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2410   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2411   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 #undef __FUNCT__
2416 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2417 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2418 {
2419   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2420   PetscInt       n      = A->rmap->n;
2421   PetscInt       cstart = A->cmap->rstart;
2422   PetscInt       *cmap  = mat->garray;
2423   PetscInt       *diagIdx, *offdiagIdx;
2424   Vec            diagV, offdiagV;
2425   PetscScalar    *a, *diagA, *offdiagA;
2426   PetscInt       r;
2427   PetscErrorCode ierr;
2428 
2429   PetscFunctionBegin;
2430   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2431   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2432   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2433   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2434   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2435   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2436   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2437   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2438   for (r = 0; r < n; ++r) {
2439     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2440       a[r]   = diagA[r];
2441       idx[r] = cstart + diagIdx[r];
2442     } else {
2443       a[r]   = offdiagA[r];
2444       idx[r] = cmap[offdiagIdx[r]];
2445     }
2446   }
2447   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2448   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2449   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2450   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2451   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2452   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2458 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2459 {
2460   PetscErrorCode ierr;
2461   Mat            *dummy;
2462 
2463   PetscFunctionBegin;
2464   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2465   *newmat = *dummy;
2466   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2472 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2473 {
2474   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2475   PetscErrorCode ierr;
2476 
2477   PetscFunctionBegin;
2478   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2479   A->errortype = a->A->errortype;
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2485 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2486 {
2487   PetscErrorCode ierr;
2488   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2489 
2490   PetscFunctionBegin;
2491   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2492   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2493   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2494   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 #undef __FUNCT__
2499 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2500 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2501 {
2502   PetscFunctionBegin;
2503   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2504   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 #undef __FUNCT__
2509 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2510 /*@
2511    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2512 
2513    Collective on Mat
2514 
2515    Input Parameters:
2516 +    A - the matrix
2517 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2518 
2519  Level: advanced
2520 
2521 @*/
2522 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2523 {
2524   PetscErrorCode       ierr;
2525 
2526   PetscFunctionBegin;
2527   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 #undef __FUNCT__
2532 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2533 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2534 {
2535   PetscErrorCode       ierr;
2536   PetscBool            sc = PETSC_FALSE,flg;
2537 
2538   PetscFunctionBegin;
2539   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2540   ierr = PetscObjectOptionsBegin((PetscObject)A);
2541     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2542     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2543     if (flg) {
2544       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2545     }
2546   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 #undef __FUNCT__
2551 #define __FUNCT__ "MatShift_MPIAIJ"
2552 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2553 {
2554   PetscErrorCode ierr;
2555   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2556   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2557 
2558   PetscFunctionBegin;
2559   if (!Y->preallocated) {
2560     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2561   } else if (!aij->nz) {
2562     PetscInt nonew = aij->nonew;
2563     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2564     aij->nonew = nonew;
2565   }
2566   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2567   PetscFunctionReturn(0);
2568 }
2569 
2570 #undef __FUNCT__
2571 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2572 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2573 {
2574   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2575   PetscErrorCode ierr;
2576 
2577   PetscFunctionBegin;
2578   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2579   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2580   if (d) {
2581     PetscInt rstart;
2582     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2583     *d += rstart;
2584 
2585   }
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 
2590 /* -------------------------------------------------------------------*/
2591 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2592                                        MatGetRow_MPIAIJ,
2593                                        MatRestoreRow_MPIAIJ,
2594                                        MatMult_MPIAIJ,
2595                                 /* 4*/ MatMultAdd_MPIAIJ,
2596                                        MatMultTranspose_MPIAIJ,
2597                                        MatMultTransposeAdd_MPIAIJ,
2598                                        0,
2599                                        0,
2600                                        0,
2601                                 /*10*/ 0,
2602                                        0,
2603                                        0,
2604                                        MatSOR_MPIAIJ,
2605                                        MatTranspose_MPIAIJ,
2606                                 /*15*/ MatGetInfo_MPIAIJ,
2607                                        MatEqual_MPIAIJ,
2608                                        MatGetDiagonal_MPIAIJ,
2609                                        MatDiagonalScale_MPIAIJ,
2610                                        MatNorm_MPIAIJ,
2611                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2612                                        MatAssemblyEnd_MPIAIJ,
2613                                        MatSetOption_MPIAIJ,
2614                                        MatZeroEntries_MPIAIJ,
2615                                 /*24*/ MatZeroRows_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                 /*29*/ MatSetUp_MPIAIJ,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                 /*34*/ MatDuplicate_MPIAIJ,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                 /*39*/ MatAXPY_MPIAIJ,
2631                                        MatGetSubMatrices_MPIAIJ,
2632                                        MatIncreaseOverlap_MPIAIJ,
2633                                        MatGetValues_MPIAIJ,
2634                                        MatCopy_MPIAIJ,
2635                                 /*44*/ MatGetRowMax_MPIAIJ,
2636                                        MatScale_MPIAIJ,
2637                                        MatShift_MPIAIJ,
2638                                        MatDiagonalSet_MPIAIJ,
2639                                        MatZeroRowsColumns_MPIAIJ,
2640                                 /*49*/ MatSetRandom_MPIAIJ,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2646                                        0,
2647                                        MatSetUnfactored_MPIAIJ,
2648                                        MatPermute_MPIAIJ,
2649                                        0,
2650                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2651                                        MatDestroy_MPIAIJ,
2652                                        MatView_MPIAIJ,
2653                                        0,
2654                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2655                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2656                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2661                                        MatGetRowMinAbs_MPIAIJ,
2662                                        0,
2663                                        MatSetColoring_MPIAIJ,
2664                                        0,
2665                                        MatSetValuesAdifor_MPIAIJ,
2666                                 /*75*/ MatFDColoringApply_AIJ,
2667                                        MatSetFromOptions_MPIAIJ,
2668                                        0,
2669                                        0,
2670                                        MatFindZeroDiagonals_MPIAIJ,
2671                                 /*80*/ 0,
2672                                        0,
2673                                        0,
2674                                 /*83*/ MatLoad_MPIAIJ,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2681                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2682                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2683                                        MatPtAP_MPIAIJ_MPIAIJ,
2684                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2685                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                 /*99*/ 0,
2691                                        0,
2692                                        0,
2693                                        MatConjugate_MPIAIJ,
2694                                        0,
2695                                 /*104*/MatSetValuesRow_MPIAIJ,
2696                                        MatRealPart_MPIAIJ,
2697                                        MatImaginaryPart_MPIAIJ,
2698                                        0,
2699                                        0,
2700                                 /*109*/0,
2701                                        0,
2702                                        MatGetRowMin_MPIAIJ,
2703                                        0,
2704                                        MatMissingDiagonal_MPIAIJ,
2705                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2706                                        0,
2707                                        MatGetGhosts_MPIAIJ,
2708                                        0,
2709                                        0,
2710                                 /*119*/0,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                        MatGetMultiProcBlock_MPIAIJ,
2715                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2716                                        MatGetColumnNorms_MPIAIJ,
2717                                        MatInvertBlockDiagonal_MPIAIJ,
2718                                        0,
2719                                        MatGetSubMatricesMPI_MPIAIJ,
2720                                 /*129*/0,
2721                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2722                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2723                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2724                                        0,
2725                                 /*134*/0,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                 /*139*/0,
2731                                        0,
2732                                        0,
2733                                        MatFDColoringSetUp_MPIXAIJ,
2734                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2735                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2736 };
2737 
2738 /* ----------------------------------------------------------------------------------------*/
2739 
2740 #undef __FUNCT__
2741 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2742 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2743 {
2744   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2745   PetscErrorCode ierr;
2746 
2747   PetscFunctionBegin;
2748   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2749   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2750   PetscFunctionReturn(0);
2751 }
2752 
2753 #undef __FUNCT__
2754 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2755 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2756 {
2757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2758   PetscErrorCode ierr;
2759 
2760   PetscFunctionBegin;
2761   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2762   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2763   PetscFunctionReturn(0);
2764 }
2765 
2766 #undef __FUNCT__
2767 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2768 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2769 {
2770   Mat_MPIAIJ     *b;
2771   PetscErrorCode ierr;
2772 
2773   PetscFunctionBegin;
2774   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2775   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2776   b = (Mat_MPIAIJ*)B->data;
2777 
2778   if (!B->preallocated) {
2779     /* Explicitly create 2 MATSEQAIJ matrices. */
2780     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2781     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2782     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2783     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2784     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2785     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2786     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2787     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2788     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2789     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2790   }
2791 
2792   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2793   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2794   B->preallocated = PETSC_TRUE;
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 #undef __FUNCT__
2799 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2800 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2801 {
2802   Mat            mat;
2803   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2804   PetscErrorCode ierr;
2805 
2806   PetscFunctionBegin;
2807   *newmat = 0;
2808   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2809   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2810   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2811   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2812   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2813   a       = (Mat_MPIAIJ*)mat->data;
2814 
2815   mat->factortype   = matin->factortype;
2816   mat->assembled    = PETSC_TRUE;
2817   mat->insertmode   = NOT_SET_VALUES;
2818   mat->preallocated = PETSC_TRUE;
2819 
2820   a->size         = oldmat->size;
2821   a->rank         = oldmat->rank;
2822   a->donotstash   = oldmat->donotstash;
2823   a->roworiented  = oldmat->roworiented;
2824   a->rowindices   = 0;
2825   a->rowvalues    = 0;
2826   a->getrowactive = PETSC_FALSE;
2827 
2828   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2829   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2830 
2831   if (oldmat->colmap) {
2832 #if defined(PETSC_USE_CTABLE)
2833     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2834 #else
2835     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2836     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2837     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2838 #endif
2839   } else a->colmap = 0;
2840   if (oldmat->garray) {
2841     PetscInt len;
2842     len  = oldmat->B->cmap->n;
2843     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2844     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2845     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2846   } else a->garray = 0;
2847 
2848   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2849   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2850   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2851   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2852   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2853   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2854   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2855   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2856   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2857   *newmat = mat;
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 
2862 
2863 #undef __FUNCT__
2864 #define __FUNCT__ "MatLoad_MPIAIJ"
2865 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2866 {
2867   PetscScalar    *vals,*svals;
2868   MPI_Comm       comm;
2869   PetscErrorCode ierr;
2870   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2871   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2872   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2873   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2874   PetscInt       cend,cstart,n,*rowners;
2875   int            fd;
2876   PetscInt       bs = newMat->rmap->bs;
2877 
2878   PetscFunctionBegin;
2879   /* force binary viewer to load .info file if it has not yet done so */
2880   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2881   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2882   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2883   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2884   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2885   if (!rank) {
2886     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2887     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2888   }
2889 
2890   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2891   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2892   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2893   if (bs < 0) bs = 1;
2894 
2895   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2896   M    = header[1]; N = header[2];
2897 
2898   /* If global sizes are set, check if they are consistent with that given in the file */
2899   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2900   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2901 
2902   /* determine ownership of all (block) rows */
2903   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2904   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2905   else m = newMat->rmap->n; /* Set by user */
2906 
2907   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2908   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2909 
2910   /* First process needs enough room for process with most rows */
2911   if (!rank) {
2912     mmax = rowners[1];
2913     for (i=2; i<=size; i++) {
2914       mmax = PetscMax(mmax, rowners[i]);
2915     }
2916   } else mmax = -1;             /* unused, but compilers complain */
2917 
2918   rowners[0] = 0;
2919   for (i=2; i<=size; i++) {
2920     rowners[i] += rowners[i-1];
2921   }
2922   rstart = rowners[rank];
2923   rend   = rowners[rank+1];
2924 
2925   /* distribute row lengths to all processors */
2926   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2927   if (!rank) {
2928     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2929     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2930     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2931     for (j=0; j<m; j++) {
2932       procsnz[0] += ourlens[j];
2933     }
2934     for (i=1; i<size; i++) {
2935       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2936       /* calculate the number of nonzeros on each processor */
2937       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2938         procsnz[i] += rowlengths[j];
2939       }
2940       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2941     }
2942     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2943   } else {
2944     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2945   }
2946 
2947   if (!rank) {
2948     /* determine max buffer needed and allocate it */
2949     maxnz = 0;
2950     for (i=0; i<size; i++) {
2951       maxnz = PetscMax(maxnz,procsnz[i]);
2952     }
2953     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2954 
2955     /* read in my part of the matrix column indices  */
2956     nz   = procsnz[0];
2957     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2958     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2959 
2960     /* read in every one elses and ship off */
2961     for (i=1; i<size; i++) {
2962       nz   = procsnz[i];
2963       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2964       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2965     }
2966     ierr = PetscFree(cols);CHKERRQ(ierr);
2967   } else {
2968     /* determine buffer space needed for message */
2969     nz = 0;
2970     for (i=0; i<m; i++) {
2971       nz += ourlens[i];
2972     }
2973     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2974 
2975     /* receive message of column indices*/
2976     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2977   }
2978 
2979   /* determine column ownership if matrix is not square */
2980   if (N != M) {
2981     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2982     else n = newMat->cmap->n;
2983     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2984     cstart = cend - n;
2985   } else {
2986     cstart = rstart;
2987     cend   = rend;
2988     n      = cend - cstart;
2989   }
2990 
2991   /* loop over local rows, determining number of off diagonal entries */
2992   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2993   jj   = 0;
2994   for (i=0; i<m; i++) {
2995     for (j=0; j<ourlens[i]; j++) {
2996       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2997       jj++;
2998     }
2999   }
3000 
3001   for (i=0; i<m; i++) {
3002     ourlens[i] -= offlens[i];
3003   }
3004   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3005 
3006   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3007 
3008   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3009 
3010   for (i=0; i<m; i++) {
3011     ourlens[i] += offlens[i];
3012   }
3013 
3014   if (!rank) {
3015     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3016 
3017     /* read in my part of the matrix numerical values  */
3018     nz   = procsnz[0];
3019     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3020 
3021     /* insert into matrix */
3022     jj      = rstart;
3023     smycols = mycols;
3024     svals   = vals;
3025     for (i=0; i<m; i++) {
3026       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3027       smycols += ourlens[i];
3028       svals   += ourlens[i];
3029       jj++;
3030     }
3031 
3032     /* read in other processors and ship out */
3033     for (i=1; i<size; i++) {
3034       nz   = procsnz[i];
3035       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3036       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3037     }
3038     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3039   } else {
3040     /* receive numeric values */
3041     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3042 
3043     /* receive message of values*/
3044     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3045 
3046     /* insert into matrix */
3047     jj      = rstart;
3048     smycols = mycols;
3049     svals   = vals;
3050     for (i=0; i<m; i++) {
3051       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3052       smycols += ourlens[i];
3053       svals   += ourlens[i];
3054       jj++;
3055     }
3056   }
3057   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3058   ierr = PetscFree(vals);CHKERRQ(ierr);
3059   ierr = PetscFree(mycols);CHKERRQ(ierr);
3060   ierr = PetscFree(rowners);CHKERRQ(ierr);
3061   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3062   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3063   PetscFunctionReturn(0);
3064 }
3065 
3066 #undef __FUNCT__
3067 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3068 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3069 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3070 {
3071   PetscErrorCode ierr;
3072   IS             iscol_local;
3073   PetscInt       csize;
3074 
3075   PetscFunctionBegin;
3076   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3077   if (call == MAT_REUSE_MATRIX) {
3078     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3079     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3080   } else {
3081     /* check if we are grabbing all columns*/
3082     PetscBool    isstride;
3083     PetscMPIInt  lisstride = 0,gisstride;
3084     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3085     if (isstride) {
3086       PetscInt  start,len,mstart,mlen;
3087       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3088       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3089       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3090       if (mstart == start && mlen-mstart == len) lisstride = 1;
3091     }
3092     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3093     if (gisstride) {
3094       PetscInt N;
3095       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3096       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3097       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3098       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3099     } else {
3100       PetscInt cbs;
3101       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3102       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3103       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3104     }
3105   }
3106   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3107   if (call == MAT_INITIAL_MATRIX) {
3108     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3109     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3110   }
3111   PetscFunctionReturn(0);
3112 }
3113 
3114 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3115 #undef __FUNCT__
3116 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3117 /*
3118     Not great since it makes two copies of the submatrix, first an SeqAIJ
3119   in local and then by concatenating the local matrices the end result.
3120   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3121 
3122   Note: This requires a sequential iscol with all indices.
3123 */
3124 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3125 {
3126   PetscErrorCode ierr;
3127   PetscMPIInt    rank,size;
3128   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3129   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3130   PetscBool      allcolumns, colflag;
3131   Mat            M,Mreuse;
3132   MatScalar      *vwork,*aa;
3133   MPI_Comm       comm;
3134   Mat_SeqAIJ     *aij;
3135 
3136   PetscFunctionBegin;
3137   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3138   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3139   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3140 
3141   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3142   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3143   if (colflag && ncol == mat->cmap->N) {
3144     allcolumns = PETSC_TRUE;
3145     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3146   } else {
3147     allcolumns = PETSC_FALSE;
3148   }
3149   if (call ==  MAT_REUSE_MATRIX) {
3150     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3151     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3152     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3153   } else {
3154     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3155   }
3156 
3157   /*
3158       m - number of local rows
3159       n - number of columns (same on all processors)
3160       rstart - first row in new global matrix generated
3161   */
3162   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3163   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3164   if (call == MAT_INITIAL_MATRIX) {
3165     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3166     ii  = aij->i;
3167     jj  = aij->j;
3168 
3169     /*
3170         Determine the number of non-zeros in the diagonal and off-diagonal
3171         portions of the matrix in order to do correct preallocation
3172     */
3173 
3174     /* first get start and end of "diagonal" columns */
3175     if (csize == PETSC_DECIDE) {
3176       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3177       if (mglobal == n) { /* square matrix */
3178         nlocal = m;
3179       } else {
3180         nlocal = n/size + ((n % size) > rank);
3181       }
3182     } else {
3183       nlocal = csize;
3184     }
3185     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3186     rstart = rend - nlocal;
3187     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3188 
3189     /* next, compute all the lengths */
3190     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3191     olens = dlens + m;
3192     for (i=0; i<m; i++) {
3193       jend = ii[i+1] - ii[i];
3194       olen = 0;
3195       dlen = 0;
3196       for (j=0; j<jend; j++) {
3197         if (*jj < rstart || *jj >= rend) olen++;
3198         else dlen++;
3199         jj++;
3200       }
3201       olens[i] = olen;
3202       dlens[i] = dlen;
3203     }
3204     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3205     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3206     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3207     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3208     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3209     ierr = PetscFree(dlens);CHKERRQ(ierr);
3210   } else {
3211     PetscInt ml,nl;
3212 
3213     M    = *newmat;
3214     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3215     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3216     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3217     /*
3218          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3219        rather than the slower MatSetValues().
3220     */
3221     M->was_assembled = PETSC_TRUE;
3222     M->assembled     = PETSC_FALSE;
3223   }
3224   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3225   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3226   ii   = aij->i;
3227   jj   = aij->j;
3228   aa   = aij->a;
3229   for (i=0; i<m; i++) {
3230     row   = rstart + i;
3231     nz    = ii[i+1] - ii[i];
3232     cwork = jj;     jj += nz;
3233     vwork = aa;     aa += nz;
3234     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3235   }
3236 
3237   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3238   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3239   *newmat = M;
3240 
3241   /* save submatrix used in processor for next request */
3242   if (call ==  MAT_INITIAL_MATRIX) {
3243     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3244     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3245   }
3246   PetscFunctionReturn(0);
3247 }
3248 
3249 #undef __FUNCT__
3250 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3251 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3252 {
3253   PetscInt       m,cstart, cend,j,nnz,i,d;
3254   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3255   const PetscInt *JJ;
3256   PetscScalar    *values;
3257   PetscErrorCode ierr;
3258 
3259   PetscFunctionBegin;
3260   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3261 
3262   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3263   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3264   m      = B->rmap->n;
3265   cstart = B->cmap->rstart;
3266   cend   = B->cmap->rend;
3267   rstart = B->rmap->rstart;
3268 
3269   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3270 
3271 #if defined(PETSC_USE_DEBUGGING)
3272   for (i=0; i<m; i++) {
3273     nnz = Ii[i+1]- Ii[i];
3274     JJ  = J + Ii[i];
3275     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3276     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3277     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3278   }
3279 #endif
3280 
3281   for (i=0; i<m; i++) {
3282     nnz     = Ii[i+1]- Ii[i];
3283     JJ      = J + Ii[i];
3284     nnz_max = PetscMax(nnz_max,nnz);
3285     d       = 0;
3286     for (j=0; j<nnz; j++) {
3287       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3288     }
3289     d_nnz[i] = d;
3290     o_nnz[i] = nnz - d;
3291   }
3292   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3293   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3294 
3295   if (v) values = (PetscScalar*)v;
3296   else {
3297     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3298   }
3299 
3300   for (i=0; i<m; i++) {
3301     ii   = i + rstart;
3302     nnz  = Ii[i+1]- Ii[i];
3303     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3304   }
3305   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3306   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3307 
3308   if (!v) {
3309     ierr = PetscFree(values);CHKERRQ(ierr);
3310   }
3311   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3312   PetscFunctionReturn(0);
3313 }
3314 
3315 #undef __FUNCT__
3316 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3317 /*@
3318    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3319    (the default parallel PETSc format).
3320 
3321    Collective on MPI_Comm
3322 
3323    Input Parameters:
3324 +  B - the matrix
3325 .  i - the indices into j for the start of each local row (starts with zero)
3326 .  j - the column indices for each local row (starts with zero)
3327 -  v - optional values in the matrix
3328 
3329    Level: developer
3330 
3331    Notes:
3332        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3333      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3334      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3335 
3336        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3337 
3338        The format which is used for the sparse matrix input, is equivalent to a
3339     row-major ordering.. i.e for the following matrix, the input data expected is
3340     as shown
3341 
3342 $        1 0 0
3343 $        2 0 3     P0
3344 $       -------
3345 $        4 5 6     P1
3346 $
3347 $     Process0 [P0]: rows_owned=[0,1]
3348 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3349 $        j =  {0,0,2}  [size = 3]
3350 $        v =  {1,2,3}  [size = 3]
3351 $
3352 $     Process1 [P1]: rows_owned=[2]
3353 $        i =  {0,3}    [size = nrow+1  = 1+1]
3354 $        j =  {0,1,2}  [size = 3]
3355 $        v =  {4,5,6}  [size = 3]
3356 
3357 .keywords: matrix, aij, compressed row, sparse, parallel
3358 
3359 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3360           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3361 @*/
3362 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3363 {
3364   PetscErrorCode ierr;
3365 
3366   PetscFunctionBegin;
3367   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3368   PetscFunctionReturn(0);
3369 }
3370 
3371 #undef __FUNCT__
3372 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3373 /*@C
3374    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3375    (the default parallel PETSc format).  For good matrix assembly performance
3376    the user should preallocate the matrix storage by setting the parameters
3377    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3378    performance can be increased by more than a factor of 50.
3379 
3380    Collective on MPI_Comm
3381 
3382    Input Parameters:
3383 +  B - the matrix
3384 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3385            (same value is used for all local rows)
3386 .  d_nnz - array containing the number of nonzeros in the various rows of the
3387            DIAGONAL portion of the local submatrix (possibly different for each row)
3388            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3389            The size of this array is equal to the number of local rows, i.e 'm'.
3390            For matrices that will be factored, you must leave room for (and set)
3391            the diagonal entry even if it is zero.
3392 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3393            submatrix (same value is used for all local rows).
3394 -  o_nnz - array containing the number of nonzeros in the various rows of the
3395            OFF-DIAGONAL portion of the local submatrix (possibly different for
3396            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3397            structure. The size of this array is equal to the number
3398            of local rows, i.e 'm'.
3399 
3400    If the *_nnz parameter is given then the *_nz parameter is ignored
3401 
3402    The AIJ format (also called the Yale sparse matrix format or
3403    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3404    storage.  The stored row and column indices begin with zero.
3405    See Users-Manual: ch_mat for details.
3406 
3407    The parallel matrix is partitioned such that the first m0 rows belong to
3408    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3409    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3410 
3411    The DIAGONAL portion of the local submatrix of a processor can be defined
3412    as the submatrix which is obtained by extraction the part corresponding to
3413    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3414    first row that belongs to the processor, r2 is the last row belonging to
3415    the this processor, and c1-c2 is range of indices of the local part of a
3416    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3417    common case of a square matrix, the row and column ranges are the same and
3418    the DIAGONAL part is also square. The remaining portion of the local
3419    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3420 
3421    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3422 
3423    You can call MatGetInfo() to get information on how effective the preallocation was;
3424    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3425    You can also run with the option -info and look for messages with the string
3426    malloc in them to see if additional memory allocation was needed.
3427 
3428    Example usage:
3429 
3430    Consider the following 8x8 matrix with 34 non-zero values, that is
3431    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3432    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3433    as follows:
3434 
3435 .vb
3436             1  2  0  |  0  3  0  |  0  4
3437     Proc0   0  5  6  |  7  0  0  |  8  0
3438             9  0 10  | 11  0  0  | 12  0
3439     -------------------------------------
3440            13  0 14  | 15 16 17  |  0  0
3441     Proc1   0 18  0  | 19 20 21  |  0  0
3442             0  0  0  | 22 23  0  | 24  0
3443     -------------------------------------
3444     Proc2  25 26 27  |  0  0 28  | 29  0
3445            30  0  0  | 31 32 33  |  0 34
3446 .ve
3447 
3448    This can be represented as a collection of submatrices as:
3449 
3450 .vb
3451       A B C
3452       D E F
3453       G H I
3454 .ve
3455 
3456    Where the submatrices A,B,C are owned by proc0, D,E,F are
3457    owned by proc1, G,H,I are owned by proc2.
3458 
3459    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3460    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3461    The 'M','N' parameters are 8,8, and have the same values on all procs.
3462 
3463    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3464    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3465    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3466    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3467    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3468    matrix, ans [DF] as another SeqAIJ matrix.
3469 
3470    When d_nz, o_nz parameters are specified, d_nz storage elements are
3471    allocated for every row of the local diagonal submatrix, and o_nz
3472    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3473    One way to choose d_nz and o_nz is to use the max nonzerors per local
3474    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3475    In this case, the values of d_nz,o_nz are:
3476 .vb
3477      proc0 : dnz = 2, o_nz = 2
3478      proc1 : dnz = 3, o_nz = 2
3479      proc2 : dnz = 1, o_nz = 4
3480 .ve
3481    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3482    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3483    for proc3. i.e we are using 12+15+10=37 storage locations to store
3484    34 values.
3485 
3486    When d_nnz, o_nnz parameters are specified, the storage is specified
3487    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3488    In the above case the values for d_nnz,o_nnz are:
3489 .vb
3490      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3491      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3492      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3493 .ve
3494    Here the space allocated is sum of all the above values i.e 34, and
3495    hence pre-allocation is perfect.
3496 
3497    Level: intermediate
3498 
3499 .keywords: matrix, aij, compressed row, sparse, parallel
3500 
3501 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3502           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3503 @*/
3504 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3505 {
3506   PetscErrorCode ierr;
3507 
3508   PetscFunctionBegin;
3509   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3510   PetscValidType(B,1);
3511   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3512   PetscFunctionReturn(0);
3513 }
3514 
3515 #undef __FUNCT__
3516 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3517 /*@
3518      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3519          CSR format the local rows.
3520 
3521    Collective on MPI_Comm
3522 
3523    Input Parameters:
3524 +  comm - MPI communicator
3525 .  m - number of local rows (Cannot be PETSC_DECIDE)
3526 .  n - This value should be the same as the local size used in creating the
3527        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3528        calculated if N is given) For square matrices n is almost always m.
3529 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3530 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3531 .   i - row indices
3532 .   j - column indices
3533 -   a - matrix values
3534 
3535    Output Parameter:
3536 .   mat - the matrix
3537 
3538    Level: intermediate
3539 
3540    Notes:
3541        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3542      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3543      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3544 
3545        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3546 
3547        The format which is used for the sparse matrix input, is equivalent to a
3548     row-major ordering.. i.e for the following matrix, the input data expected is
3549     as shown
3550 
3551 $        1 0 0
3552 $        2 0 3     P0
3553 $       -------
3554 $        4 5 6     P1
3555 $
3556 $     Process0 [P0]: rows_owned=[0,1]
3557 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3558 $        j =  {0,0,2}  [size = 3]
3559 $        v =  {1,2,3}  [size = 3]
3560 $
3561 $     Process1 [P1]: rows_owned=[2]
3562 $        i =  {0,3}    [size = nrow+1  = 1+1]
3563 $        j =  {0,1,2}  [size = 3]
3564 $        v =  {4,5,6}  [size = 3]
3565 
3566 .keywords: matrix, aij, compressed row, sparse, parallel
3567 
3568 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3569           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3570 @*/
3571 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3572 {
3573   PetscErrorCode ierr;
3574 
3575   PetscFunctionBegin;
3576   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3577   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3578   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3579   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3580   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3581   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3582   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3583   PetscFunctionReturn(0);
3584 }
3585 
3586 #undef __FUNCT__
3587 #define __FUNCT__ "MatCreateAIJ"
3588 /*@C
3589    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3590    (the default parallel PETSc format).  For good matrix assembly performance
3591    the user should preallocate the matrix storage by setting the parameters
3592    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3593    performance can be increased by more than a factor of 50.
3594 
3595    Collective on MPI_Comm
3596 
3597    Input Parameters:
3598 +  comm - MPI communicator
3599 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3600            This value should be the same as the local size used in creating the
3601            y vector for the matrix-vector product y = Ax.
3602 .  n - This value should be the same as the local size used in creating the
3603        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3604        calculated if N is given) For square matrices n is almost always m.
3605 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3606 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3607 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3608            (same value is used for all local rows)
3609 .  d_nnz - array containing the number of nonzeros in the various rows of the
3610            DIAGONAL portion of the local submatrix (possibly different for each row)
3611            or NULL, if d_nz is used to specify the nonzero structure.
3612            The size of this array is equal to the number of local rows, i.e 'm'.
3613 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3614            submatrix (same value is used for all local rows).
3615 -  o_nnz - array containing the number of nonzeros in the various rows of the
3616            OFF-DIAGONAL portion of the local submatrix (possibly different for
3617            each row) or NULL, if o_nz is used to specify the nonzero
3618            structure. The size of this array is equal to the number
3619            of local rows, i.e 'm'.
3620 
3621    Output Parameter:
3622 .  A - the matrix
3623 
3624    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3625    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3626    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3627 
3628    Notes:
3629    If the *_nnz parameter is given then the *_nz parameter is ignored
3630 
3631    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3632    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3633    storage requirements for this matrix.
3634 
3635    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3636    processor than it must be used on all processors that share the object for
3637    that argument.
3638 
3639    The user MUST specify either the local or global matrix dimensions
3640    (possibly both).
3641 
3642    The parallel matrix is partitioned across processors such that the
3643    first m0 rows belong to process 0, the next m1 rows belong to
3644    process 1, the next m2 rows belong to process 2 etc.. where
3645    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3646    values corresponding to [m x N] submatrix.
3647 
3648    The columns are logically partitioned with the n0 columns belonging
3649    to 0th partition, the next n1 columns belonging to the next
3650    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3651 
3652    The DIAGONAL portion of the local submatrix on any given processor
3653    is the submatrix corresponding to the rows and columns m,n
3654    corresponding to the given processor. i.e diagonal matrix on
3655    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3656    etc. The remaining portion of the local submatrix [m x (N-n)]
3657    constitute the OFF-DIAGONAL portion. The example below better
3658    illustrates this concept.
3659 
3660    For a square global matrix we define each processor's diagonal portion
3661    to be its local rows and the corresponding columns (a square submatrix);
3662    each processor's off-diagonal portion encompasses the remainder of the
3663    local matrix (a rectangular submatrix).
3664 
3665    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3666 
3667    When calling this routine with a single process communicator, a matrix of
3668    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3669    type of communicator, use the construction mechanism:
3670      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3671 
3672    By default, this format uses inodes (identical nodes) when possible.
3673    We search for consecutive rows with the same nonzero structure, thereby
3674    reusing matrix information to achieve increased efficiency.
3675 
3676    Options Database Keys:
3677 +  -mat_no_inode  - Do not use inodes
3678 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3679 -  -mat_aij_oneindex - Internally use indexing starting at 1
3680         rather than 0.  Note that when calling MatSetValues(),
3681         the user still MUST index entries starting at 0!
3682 
3683 
3684    Example usage:
3685 
3686    Consider the following 8x8 matrix with 34 non-zero values, that is
3687    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3688    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3689    as follows:
3690 
3691 .vb
3692             1  2  0  |  0  3  0  |  0  4
3693     Proc0   0  5  6  |  7  0  0  |  8  0
3694             9  0 10  | 11  0  0  | 12  0
3695     -------------------------------------
3696            13  0 14  | 15 16 17  |  0  0
3697     Proc1   0 18  0  | 19 20 21  |  0  0
3698             0  0  0  | 22 23  0  | 24  0
3699     -------------------------------------
3700     Proc2  25 26 27  |  0  0 28  | 29  0
3701            30  0  0  | 31 32 33  |  0 34
3702 .ve
3703 
3704    This can be represented as a collection of submatrices as:
3705 
3706 .vb
3707       A B C
3708       D E F
3709       G H I
3710 .ve
3711 
3712    Where the submatrices A,B,C are owned by proc0, D,E,F are
3713    owned by proc1, G,H,I are owned by proc2.
3714 
3715    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3716    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3717    The 'M','N' parameters are 8,8, and have the same values on all procs.
3718 
3719    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3720    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3721    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3722    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3723    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3724    matrix, ans [DF] as another SeqAIJ matrix.
3725 
3726    When d_nz, o_nz parameters are specified, d_nz storage elements are
3727    allocated for every row of the local diagonal submatrix, and o_nz
3728    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3729    One way to choose d_nz and o_nz is to use the max nonzerors per local
3730    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3731    In this case, the values of d_nz,o_nz are:
3732 .vb
3733      proc0 : dnz = 2, o_nz = 2
3734      proc1 : dnz = 3, o_nz = 2
3735      proc2 : dnz = 1, o_nz = 4
3736 .ve
3737    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3738    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3739    for proc3. i.e we are using 12+15+10=37 storage locations to store
3740    34 values.
3741 
3742    When d_nnz, o_nnz parameters are specified, the storage is specified
3743    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3744    In the above case the values for d_nnz,o_nnz are:
3745 .vb
3746      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3747      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3748      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3749 .ve
3750    Here the space allocated is sum of all the above values i.e 34, and
3751    hence pre-allocation is perfect.
3752 
3753    Level: intermediate
3754 
3755 .keywords: matrix, aij, compressed row, sparse, parallel
3756 
3757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3758           MPIAIJ, MatCreateMPIAIJWithArrays()
3759 @*/
3760 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3761 {
3762   PetscErrorCode ierr;
3763   PetscMPIInt    size;
3764 
3765   PetscFunctionBegin;
3766   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3767   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3768   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3769   if (size > 1) {
3770     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3771     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3772   } else {
3773     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3774     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3775   }
3776   PetscFunctionReturn(0);
3777 }
3778 
3779 #undef __FUNCT__
3780 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3781 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3782 {
3783   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3784   PetscBool      flg;
3785   PetscErrorCode ierr;
3786 
3787   PetscFunctionBegin;
3788   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3789   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input");
3790   if (Ad)     *Ad     = a->A;
3791   if (Ao)     *Ao     = a->B;
3792   if (colmap) *colmap = a->garray;
3793   PetscFunctionReturn(0);
3794 }
3795 
3796 #undef __FUNCT__
3797 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3798 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3799 {
3800   PetscErrorCode ierr;
3801   PetscInt       i;
3802   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3803 
3804   PetscFunctionBegin;
3805   if (coloring->ctype == IS_COLORING_GLOBAL) {
3806     ISColoringValue *allcolors,*colors;
3807     ISColoring      ocoloring;
3808 
3809     /* set coloring for diagonal portion */
3810     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3811 
3812     /* set coloring for off-diagonal portion */
3813     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3814     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3815     for (i=0; i<a->B->cmap->n; i++) {
3816       colors[i] = allcolors[a->garray[i]];
3817     }
3818     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3819     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3820     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3821     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3822   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3823     ISColoringValue *colors;
3824     PetscInt        *larray;
3825     ISColoring      ocoloring;
3826 
3827     /* set coloring for diagonal portion */
3828     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3829     for (i=0; i<a->A->cmap->n; i++) {
3830       larray[i] = i + A->cmap->rstart;
3831     }
3832     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3833     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3834     for (i=0; i<a->A->cmap->n; i++) {
3835       colors[i] = coloring->colors[larray[i]];
3836     }
3837     ierr = PetscFree(larray);CHKERRQ(ierr);
3838     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3839     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3840     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3841 
3842     /* set coloring for off-diagonal portion */
3843     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3844     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3845     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3846     for (i=0; i<a->B->cmap->n; i++) {
3847       colors[i] = coloring->colors[larray[i]];
3848     }
3849     ierr = PetscFree(larray);CHKERRQ(ierr);
3850     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3851     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3852     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3853   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3854   PetscFunctionReturn(0);
3855 }
3856 
3857 #undef __FUNCT__
3858 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3859 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3860 {
3861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3862   PetscErrorCode ierr;
3863 
3864   PetscFunctionBegin;
3865   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3866   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3867   PetscFunctionReturn(0);
3868 }
3869 
3870 #undef __FUNCT__
3871 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3872 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3873 {
3874   PetscErrorCode ierr;
3875   PetscInt       m,N,i,rstart,nnz,Ii;
3876   PetscInt       *indx;
3877   PetscScalar    *values;
3878 
3879   PetscFunctionBegin;
3880   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3881   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3882     PetscInt       *dnz,*onz,sum,bs,cbs;
3883 
3884     if (n == PETSC_DECIDE) {
3885       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3886     }
3887     /* Check sum(n) = N */
3888     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3889     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3890 
3891     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3892     rstart -= m;
3893 
3894     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3895     for (i=0; i<m; i++) {
3896       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3897       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3898       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3899     }
3900 
3901     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3902     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3903     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3904     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3905     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3906     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3907     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3908   }
3909 
3910   /* numeric phase */
3911   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3912   for (i=0; i<m; i++) {
3913     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3914     Ii   = i + rstart;
3915     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3916     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3917   }
3918   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3919   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3920   PetscFunctionReturn(0);
3921 }
3922 
3923 #undef __FUNCT__
3924 #define __FUNCT__ "MatFileSplit"
3925 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3926 {
3927   PetscErrorCode    ierr;
3928   PetscMPIInt       rank;
3929   PetscInt          m,N,i,rstart,nnz;
3930   size_t            len;
3931   const PetscInt    *indx;
3932   PetscViewer       out;
3933   char              *name;
3934   Mat               B;
3935   const PetscScalar *values;
3936 
3937   PetscFunctionBegin;
3938   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3939   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3940   /* Should this be the type of the diagonal block of A? */
3941   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3942   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3943   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3944   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3945   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3946   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3947   for (i=0; i<m; i++) {
3948     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3949     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3950     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3951   }
3952   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3953   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3954 
3955   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3956   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3957   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3958   sprintf(name,"%s.%d",outfile,rank);
3959   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3960   ierr = PetscFree(name);CHKERRQ(ierr);
3961   ierr = MatView(B,out);CHKERRQ(ierr);
3962   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3963   ierr = MatDestroy(&B);CHKERRQ(ierr);
3964   PetscFunctionReturn(0);
3965 }
3966 
3967 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3968 #undef __FUNCT__
3969 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3970 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3971 {
3972   PetscErrorCode      ierr;
3973   Mat_Merge_SeqsToMPI *merge;
3974   PetscContainer      container;
3975 
3976   PetscFunctionBegin;
3977   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3978   if (container) {
3979     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3980     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3981     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3982     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3983     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3984     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3985     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3986     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3987     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3988     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3989     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3990     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3991     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3992     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3993     ierr = PetscFree(merge);CHKERRQ(ierr);
3994     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3995   }
3996   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3997   PetscFunctionReturn(0);
3998 }
3999 
4000 #include <../src/mat/utils/freespace.h>
4001 #include <petscbt.h>
4002 
4003 #undef __FUNCT__
4004 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4005 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4006 {
4007   PetscErrorCode      ierr;
4008   MPI_Comm            comm;
4009   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4010   PetscMPIInt         size,rank,taga,*len_s;
4011   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4012   PetscInt            proc,m;
4013   PetscInt            **buf_ri,**buf_rj;
4014   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4015   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4016   MPI_Request         *s_waits,*r_waits;
4017   MPI_Status          *status;
4018   MatScalar           *aa=a->a;
4019   MatScalar           **abuf_r,*ba_i;
4020   Mat_Merge_SeqsToMPI *merge;
4021   PetscContainer      container;
4022 
4023   PetscFunctionBegin;
4024   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4025   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4026 
4027   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4028   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4029 
4030   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4031   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4032 
4033   bi     = merge->bi;
4034   bj     = merge->bj;
4035   buf_ri = merge->buf_ri;
4036   buf_rj = merge->buf_rj;
4037 
4038   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4039   owners = merge->rowmap->range;
4040   len_s  = merge->len_s;
4041 
4042   /* send and recv matrix values */
4043   /*-----------------------------*/
4044   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4045   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4046 
4047   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4048   for (proc=0,k=0; proc<size; proc++) {
4049     if (!len_s[proc]) continue;
4050     i    = owners[proc];
4051     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4052     k++;
4053   }
4054 
4055   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4056   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4057   ierr = PetscFree(status);CHKERRQ(ierr);
4058 
4059   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4060   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4061 
4062   /* insert mat values of mpimat */
4063   /*----------------------------*/
4064   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4065   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4066 
4067   for (k=0; k<merge->nrecv; k++) {
4068     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4069     nrows       = *(buf_ri_k[k]);
4070     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4071     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4072   }
4073 
4074   /* set values of ba */
4075   m = merge->rowmap->n;
4076   for (i=0; i<m; i++) {
4077     arow = owners[rank] + i;
4078     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4079     bnzi = bi[i+1] - bi[i];
4080     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4081 
4082     /* add local non-zero vals of this proc's seqmat into ba */
4083     anzi   = ai[arow+1] - ai[arow];
4084     aj     = a->j + ai[arow];
4085     aa     = a->a + ai[arow];
4086     nextaj = 0;
4087     for (j=0; nextaj<anzi; j++) {
4088       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4089         ba_i[j] += aa[nextaj++];
4090       }
4091     }
4092 
4093     /* add received vals into ba */
4094     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4095       /* i-th row */
4096       if (i == *nextrow[k]) {
4097         anzi   = *(nextai[k]+1) - *nextai[k];
4098         aj     = buf_rj[k] + *(nextai[k]);
4099         aa     = abuf_r[k] + *(nextai[k]);
4100         nextaj = 0;
4101         for (j=0; nextaj<anzi; j++) {
4102           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4103             ba_i[j] += aa[nextaj++];
4104           }
4105         }
4106         nextrow[k]++; nextai[k]++;
4107       }
4108     }
4109     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4110   }
4111   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4112   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4113 
4114   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4115   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4116   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4117   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4118   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4119   PetscFunctionReturn(0);
4120 }
4121 
4122 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4123 
4124 #undef __FUNCT__
4125 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4126 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4127 {
4128   PetscErrorCode      ierr;
4129   Mat                 B_mpi;
4130   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4131   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4132   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4133   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4134   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4135   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4136   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4137   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4138   MPI_Status          *status;
4139   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4140   PetscBT             lnkbt;
4141   Mat_Merge_SeqsToMPI *merge;
4142   PetscContainer      container;
4143 
4144   PetscFunctionBegin;
4145   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4146 
4147   /* make sure it is a PETSc comm */
4148   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4149   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4150   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4151 
4152   ierr = PetscNew(&merge);CHKERRQ(ierr);
4153   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4154 
4155   /* determine row ownership */
4156   /*---------------------------------------------------------*/
4157   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4158   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4159   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4160   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4161   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4162   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4163   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4164 
4165   m      = merge->rowmap->n;
4166   owners = merge->rowmap->range;
4167 
4168   /* determine the number of messages to send, their lengths */
4169   /*---------------------------------------------------------*/
4170   len_s = merge->len_s;
4171 
4172   len          = 0; /* length of buf_si[] */
4173   merge->nsend = 0;
4174   for (proc=0; proc<size; proc++) {
4175     len_si[proc] = 0;
4176     if (proc == rank) {
4177       len_s[proc] = 0;
4178     } else {
4179       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4180       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4181     }
4182     if (len_s[proc]) {
4183       merge->nsend++;
4184       nrows = 0;
4185       for (i=owners[proc]; i<owners[proc+1]; i++) {
4186         if (ai[i+1] > ai[i]) nrows++;
4187       }
4188       len_si[proc] = 2*(nrows+1);
4189       len         += len_si[proc];
4190     }
4191   }
4192 
4193   /* determine the number and length of messages to receive for ij-structure */
4194   /*-------------------------------------------------------------------------*/
4195   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4196   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4197 
4198   /* post the Irecv of j-structure */
4199   /*-------------------------------*/
4200   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4201   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4202 
4203   /* post the Isend of j-structure */
4204   /*--------------------------------*/
4205   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4206 
4207   for (proc=0, k=0; proc<size; proc++) {
4208     if (!len_s[proc]) continue;
4209     i    = owners[proc];
4210     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4211     k++;
4212   }
4213 
4214   /* receives and sends of j-structure are complete */
4215   /*------------------------------------------------*/
4216   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4217   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4218 
4219   /* send and recv i-structure */
4220   /*---------------------------*/
4221   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4222   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4223 
4224   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4225   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4226   for (proc=0,k=0; proc<size; proc++) {
4227     if (!len_s[proc]) continue;
4228     /* form outgoing message for i-structure:
4229          buf_si[0]:                 nrows to be sent
4230                [1:nrows]:           row index (global)
4231                [nrows+1:2*nrows+1]: i-structure index
4232     */
4233     /*-------------------------------------------*/
4234     nrows       = len_si[proc]/2 - 1;
4235     buf_si_i    = buf_si + nrows+1;
4236     buf_si[0]   = nrows;
4237     buf_si_i[0] = 0;
4238     nrows       = 0;
4239     for (i=owners[proc]; i<owners[proc+1]; i++) {
4240       anzi = ai[i+1] - ai[i];
4241       if (anzi) {
4242         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4243         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4244         nrows++;
4245       }
4246     }
4247     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4248     k++;
4249     buf_si += len_si[proc];
4250   }
4251 
4252   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4253   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4254 
4255   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4256   for (i=0; i<merge->nrecv; i++) {
4257     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4258   }
4259 
4260   ierr = PetscFree(len_si);CHKERRQ(ierr);
4261   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4262   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4263   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4264   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4265   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4266   ierr = PetscFree(status);CHKERRQ(ierr);
4267 
4268   /* compute a local seq matrix in each processor */
4269   /*----------------------------------------------*/
4270   /* allocate bi array and free space for accumulating nonzero column info */
4271   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4272   bi[0] = 0;
4273 
4274   /* create and initialize a linked list */
4275   nlnk = N+1;
4276   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4277 
4278   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4279   len  = ai[owners[rank+1]] - ai[owners[rank]];
4280   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4281 
4282   current_space = free_space;
4283 
4284   /* determine symbolic info for each local row */
4285   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4286 
4287   for (k=0; k<merge->nrecv; k++) {
4288     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4289     nrows       = *buf_ri_k[k];
4290     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4291     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4292   }
4293 
4294   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4295   len  = 0;
4296   for (i=0; i<m; i++) {
4297     bnzi = 0;
4298     /* add local non-zero cols of this proc's seqmat into lnk */
4299     arow  = owners[rank] + i;
4300     anzi  = ai[arow+1] - ai[arow];
4301     aj    = a->j + ai[arow];
4302     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4303     bnzi += nlnk;
4304     /* add received col data into lnk */
4305     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4306       if (i == *nextrow[k]) { /* i-th row */
4307         anzi  = *(nextai[k]+1) - *nextai[k];
4308         aj    = buf_rj[k] + *nextai[k];
4309         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4310         bnzi += nlnk;
4311         nextrow[k]++; nextai[k]++;
4312       }
4313     }
4314     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4315 
4316     /* if free space is not available, make more free space */
4317     if (current_space->local_remaining<bnzi) {
4318       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4319       nspacedouble++;
4320     }
4321     /* copy data into free space, then initialize lnk */
4322     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4323     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4324 
4325     current_space->array           += bnzi;
4326     current_space->local_used      += bnzi;
4327     current_space->local_remaining -= bnzi;
4328 
4329     bi[i+1] = bi[i] + bnzi;
4330   }
4331 
4332   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4333 
4334   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4335   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4336   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4337 
4338   /* create symbolic parallel matrix B_mpi */
4339   /*---------------------------------------*/
4340   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4341   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4342   if (n==PETSC_DECIDE) {
4343     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4344   } else {
4345     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4346   }
4347   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4348   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4349   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4350   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4351   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4352 
4353   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4354   B_mpi->assembled    = PETSC_FALSE;
4355   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4356   merge->bi           = bi;
4357   merge->bj           = bj;
4358   merge->buf_ri       = buf_ri;
4359   merge->buf_rj       = buf_rj;
4360   merge->coi          = NULL;
4361   merge->coj          = NULL;
4362   merge->owners_co    = NULL;
4363 
4364   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4365 
4366   /* attach the supporting struct to B_mpi for reuse */
4367   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4368   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4369   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4370   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4371   *mpimat = B_mpi;
4372 
4373   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4374   PetscFunctionReturn(0);
4375 }
4376 
4377 #undef __FUNCT__
4378 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4379 /*@C
4380       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4381                  matrices from each processor
4382 
4383     Collective on MPI_Comm
4384 
4385    Input Parameters:
4386 +    comm - the communicators the parallel matrix will live on
4387 .    seqmat - the input sequential matrices
4388 .    m - number of local rows (or PETSC_DECIDE)
4389 .    n - number of local columns (or PETSC_DECIDE)
4390 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4391 
4392    Output Parameter:
4393 .    mpimat - the parallel matrix generated
4394 
4395     Level: advanced
4396 
4397    Notes:
4398      The dimensions of the sequential matrix in each processor MUST be the same.
4399      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4400      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4401 @*/
4402 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4403 {
4404   PetscErrorCode ierr;
4405   PetscMPIInt    size;
4406 
4407   PetscFunctionBegin;
4408   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4409   if (size == 1) {
4410     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4411     if (scall == MAT_INITIAL_MATRIX) {
4412       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4413     } else {
4414       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4415     }
4416     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4417     PetscFunctionReturn(0);
4418   }
4419   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4420   if (scall == MAT_INITIAL_MATRIX) {
4421     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4422   }
4423   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4424   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4425   PetscFunctionReturn(0);
4426 }
4427 
4428 #undef __FUNCT__
4429 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4430 /*@
4431      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4432           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4433           with MatGetSize()
4434 
4435     Not Collective
4436 
4437    Input Parameters:
4438 +    A - the matrix
4439 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4440 
4441    Output Parameter:
4442 .    A_loc - the local sequential matrix generated
4443 
4444     Level: developer
4445 
4446 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4447 
4448 @*/
4449 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4450 {
4451   PetscErrorCode ierr;
4452   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4453   Mat_SeqAIJ     *mat,*a,*b;
4454   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4455   MatScalar      *aa,*ba,*cam;
4456   PetscScalar    *ca;
4457   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4458   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4459   PetscBool      match;
4460   MPI_Comm       comm;
4461   PetscMPIInt    size;
4462 
4463   PetscFunctionBegin;
4464   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4465   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4466   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4467   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4468   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4469 
4470   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4471   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4472   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4473   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4474   aa = a->a; ba = b->a;
4475   if (scall == MAT_INITIAL_MATRIX) {
4476     if (size == 1) {
4477       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4478       PetscFunctionReturn(0);
4479     }
4480 
4481     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4482     ci[0] = 0;
4483     for (i=0; i<am; i++) {
4484       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4485     }
4486     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4487     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4488     k    = 0;
4489     for (i=0; i<am; i++) {
4490       ncols_o = bi[i+1] - bi[i];
4491       ncols_d = ai[i+1] - ai[i];
4492       /* off-diagonal portion of A */
4493       for (jo=0; jo<ncols_o; jo++) {
4494         col = cmap[*bj];
4495         if (col >= cstart) break;
4496         cj[k]   = col; bj++;
4497         ca[k++] = *ba++;
4498       }
4499       /* diagonal portion of A */
4500       for (j=0; j<ncols_d; j++) {
4501         cj[k]   = cstart + *aj++;
4502         ca[k++] = *aa++;
4503       }
4504       /* off-diagonal portion of A */
4505       for (j=jo; j<ncols_o; j++) {
4506         cj[k]   = cmap[*bj++];
4507         ca[k++] = *ba++;
4508       }
4509     }
4510     /* put together the new matrix */
4511     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4512     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4513     /* Since these are PETSc arrays, change flags to free them as necessary. */
4514     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4515     mat->free_a  = PETSC_TRUE;
4516     mat->free_ij = PETSC_TRUE;
4517     mat->nonew   = 0;
4518   } else if (scall == MAT_REUSE_MATRIX) {
4519     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4520     ci = mat->i; cj = mat->j; cam = mat->a;
4521     for (i=0; i<am; i++) {
4522       /* off-diagonal portion of A */
4523       ncols_o = bi[i+1] - bi[i];
4524       for (jo=0; jo<ncols_o; jo++) {
4525         col = cmap[*bj];
4526         if (col >= cstart) break;
4527         *cam++ = *ba++; bj++;
4528       }
4529       /* diagonal portion of A */
4530       ncols_d = ai[i+1] - ai[i];
4531       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4532       /* off-diagonal portion of A */
4533       for (j=jo; j<ncols_o; j++) {
4534         *cam++ = *ba++; bj++;
4535       }
4536     }
4537   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4538   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4539   PetscFunctionReturn(0);
4540 }
4541 
4542 #undef __FUNCT__
4543 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4544 /*@C
4545      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4546 
4547     Not Collective
4548 
4549    Input Parameters:
4550 +    A - the matrix
4551 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4552 -    row, col - index sets of rows and columns to extract (or NULL)
4553 
4554    Output Parameter:
4555 .    A_loc - the local sequential matrix generated
4556 
4557     Level: developer
4558 
4559 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4560 
4561 @*/
4562 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4563 {
4564   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4565   PetscErrorCode ierr;
4566   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4567   IS             isrowa,iscola;
4568   Mat            *aloc;
4569   PetscBool      match;
4570 
4571   PetscFunctionBegin;
4572   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4573   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4574   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4575   if (!row) {
4576     start = A->rmap->rstart; end = A->rmap->rend;
4577     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4578   } else {
4579     isrowa = *row;
4580   }
4581   if (!col) {
4582     start = A->cmap->rstart;
4583     cmap  = a->garray;
4584     nzA   = a->A->cmap->n;
4585     nzB   = a->B->cmap->n;
4586     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4587     ncols = 0;
4588     for (i=0; i<nzB; i++) {
4589       if (cmap[i] < start) idx[ncols++] = cmap[i];
4590       else break;
4591     }
4592     imark = i;
4593     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4594     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4595     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4596   } else {
4597     iscola = *col;
4598   }
4599   if (scall != MAT_INITIAL_MATRIX) {
4600     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4601     aloc[0] = *A_loc;
4602   }
4603   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4604   *A_loc = aloc[0];
4605   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4606   if (!row) {
4607     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4608   }
4609   if (!col) {
4610     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4611   }
4612   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4613   PetscFunctionReturn(0);
4614 }
4615 
4616 #undef __FUNCT__
4617 #define __FUNCT__ "MatGetBrowsOfAcols"
4618 /*@C
4619     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4620 
4621     Collective on Mat
4622 
4623    Input Parameters:
4624 +    A,B - the matrices in mpiaij format
4625 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4626 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4627 
4628    Output Parameter:
4629 +    rowb, colb - index sets of rows and columns of B to extract
4630 -    B_seq - the sequential matrix generated
4631 
4632     Level: developer
4633 
4634 @*/
4635 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4636 {
4637   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4638   PetscErrorCode ierr;
4639   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4640   IS             isrowb,iscolb;
4641   Mat            *bseq=NULL;
4642 
4643   PetscFunctionBegin;
4644   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4645     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4646   }
4647   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4648 
4649   if (scall == MAT_INITIAL_MATRIX) {
4650     start = A->cmap->rstart;
4651     cmap  = a->garray;
4652     nzA   = a->A->cmap->n;
4653     nzB   = a->B->cmap->n;
4654     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4655     ncols = 0;
4656     for (i=0; i<nzB; i++) {  /* row < local row index */
4657       if (cmap[i] < start) idx[ncols++] = cmap[i];
4658       else break;
4659     }
4660     imark = i;
4661     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4662     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4663     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4664     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4665   } else {
4666     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4667     isrowb  = *rowb; iscolb = *colb;
4668     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4669     bseq[0] = *B_seq;
4670   }
4671   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4672   *B_seq = bseq[0];
4673   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4674   if (!rowb) {
4675     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4676   } else {
4677     *rowb = isrowb;
4678   }
4679   if (!colb) {
4680     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4681   } else {
4682     *colb = iscolb;
4683   }
4684   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4685   PetscFunctionReturn(0);
4686 }
4687 
4688 #undef __FUNCT__
4689 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4690 /*
4691     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4692     of the OFF-DIAGONAL portion of local A
4693 
4694     Collective on Mat
4695 
4696    Input Parameters:
4697 +    A,B - the matrices in mpiaij format
4698 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4699 
4700    Output Parameter:
4701 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4702 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4703 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4704 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4705 
4706     Level: developer
4707 
4708 */
4709 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4710 {
4711   VecScatter_MPI_General *gen_to,*gen_from;
4712   PetscErrorCode         ierr;
4713   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4714   Mat_SeqAIJ             *b_oth;
4715   VecScatter             ctx =a->Mvctx;
4716   MPI_Comm               comm;
4717   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4718   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4719   PetscScalar            *rvalues,*svalues;
4720   MatScalar              *b_otha,*bufa,*bufA;
4721   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4722   MPI_Request            *rwaits = NULL,*swaits = NULL;
4723   MPI_Status             *sstatus,rstatus;
4724   PetscMPIInt            jj,size;
4725   PetscInt               *cols,sbs,rbs;
4726   PetscScalar            *vals;
4727 
4728   PetscFunctionBegin;
4729   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4730   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4731 
4732   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4733     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4734   }
4735   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4736   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4737 
4738   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4739   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4740   rvalues  = gen_from->values; /* holds the length of receiving row */
4741   svalues  = gen_to->values;   /* holds the length of sending row */
4742   nrecvs   = gen_from->n;
4743   nsends   = gen_to->n;
4744 
4745   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4746   srow    = gen_to->indices;    /* local row index to be sent */
4747   sstarts = gen_to->starts;
4748   sprocs  = gen_to->procs;
4749   sstatus = gen_to->sstatus;
4750   sbs     = gen_to->bs;
4751   rstarts = gen_from->starts;
4752   rprocs  = gen_from->procs;
4753   rbs     = gen_from->bs;
4754 
4755   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4756   if (scall == MAT_INITIAL_MATRIX) {
4757     /* i-array */
4758     /*---------*/
4759     /*  post receives */
4760     for (i=0; i<nrecvs; i++) {
4761       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4762       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4763       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4764     }
4765 
4766     /* pack the outgoing message */
4767     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4768 
4769     sstartsj[0] = 0;
4770     rstartsj[0] = 0;
4771     len         = 0; /* total length of j or a array to be sent */
4772     k           = 0;
4773     for (i=0; i<nsends; i++) {
4774       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4775       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4776       for (j=0; j<nrows; j++) {
4777         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4778         for (l=0; l<sbs; l++) {
4779           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4780 
4781           rowlen[j*sbs+l] = ncols;
4782 
4783           len += ncols;
4784           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4785         }
4786         k++;
4787       }
4788       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4789 
4790       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4791     }
4792     /* recvs and sends of i-array are completed */
4793     i = nrecvs;
4794     while (i--) {
4795       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4796     }
4797     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4798 
4799     /* allocate buffers for sending j and a arrays */
4800     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4801     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4802 
4803     /* create i-array of B_oth */
4804     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4805 
4806     b_othi[0] = 0;
4807     len       = 0; /* total length of j or a array to be received */
4808     k         = 0;
4809     for (i=0; i<nrecvs; i++) {
4810       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4811       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4812       for (j=0; j<nrows; j++) {
4813         b_othi[k+1] = b_othi[k] + rowlen[j];
4814         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4815         k++;
4816       }
4817       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4818     }
4819 
4820     /* allocate space for j and a arrrays of B_oth */
4821     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4822     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4823 
4824     /* j-array */
4825     /*---------*/
4826     /*  post receives of j-array */
4827     for (i=0; i<nrecvs; i++) {
4828       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4829       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4830     }
4831 
4832     /* pack the outgoing message j-array */
4833     k = 0;
4834     for (i=0; i<nsends; i++) {
4835       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4836       bufJ  = bufj+sstartsj[i];
4837       for (j=0; j<nrows; j++) {
4838         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4839         for (ll=0; ll<sbs; ll++) {
4840           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4841           for (l=0; l<ncols; l++) {
4842             *bufJ++ = cols[l];
4843           }
4844           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4845         }
4846       }
4847       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4848     }
4849 
4850     /* recvs and sends of j-array are completed */
4851     i = nrecvs;
4852     while (i--) {
4853       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4854     }
4855     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4856   } else if (scall == MAT_REUSE_MATRIX) {
4857     sstartsj = *startsj_s;
4858     rstartsj = *startsj_r;
4859     bufa     = *bufa_ptr;
4860     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4861     b_otha   = b_oth->a;
4862   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4863 
4864   /* a-array */
4865   /*---------*/
4866   /*  post receives of a-array */
4867   for (i=0; i<nrecvs; i++) {
4868     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4869     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4870   }
4871 
4872   /* pack the outgoing message a-array */
4873   k = 0;
4874   for (i=0; i<nsends; i++) {
4875     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4876     bufA  = bufa+sstartsj[i];
4877     for (j=0; j<nrows; j++) {
4878       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4879       for (ll=0; ll<sbs; ll++) {
4880         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4881         for (l=0; l<ncols; l++) {
4882           *bufA++ = vals[l];
4883         }
4884         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4885       }
4886     }
4887     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4888   }
4889   /* recvs and sends of a-array are completed */
4890   i = nrecvs;
4891   while (i--) {
4892     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4893   }
4894   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4895   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4896 
4897   if (scall == MAT_INITIAL_MATRIX) {
4898     /* put together the new matrix */
4899     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4900 
4901     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4902     /* Since these are PETSc arrays, change flags to free them as necessary. */
4903     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4904     b_oth->free_a  = PETSC_TRUE;
4905     b_oth->free_ij = PETSC_TRUE;
4906     b_oth->nonew   = 0;
4907 
4908     ierr = PetscFree(bufj);CHKERRQ(ierr);
4909     if (!startsj_s || !bufa_ptr) {
4910       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4911       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4912     } else {
4913       *startsj_s = sstartsj;
4914       *startsj_r = rstartsj;
4915       *bufa_ptr  = bufa;
4916     }
4917   }
4918   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4919   PetscFunctionReturn(0);
4920 }
4921 
4922 #undef __FUNCT__
4923 #define __FUNCT__ "MatGetCommunicationStructs"
4924 /*@C
4925   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4926 
4927   Not Collective
4928 
4929   Input Parameters:
4930 . A - The matrix in mpiaij format
4931 
4932   Output Parameter:
4933 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4934 . colmap - A map from global column index to local index into lvec
4935 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4936 
4937   Level: developer
4938 
4939 @*/
4940 #if defined(PETSC_USE_CTABLE)
4941 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4942 #else
4943 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4944 #endif
4945 {
4946   Mat_MPIAIJ *a;
4947 
4948   PetscFunctionBegin;
4949   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4950   PetscValidPointer(lvec, 2);
4951   PetscValidPointer(colmap, 3);
4952   PetscValidPointer(multScatter, 4);
4953   a = (Mat_MPIAIJ*) A->data;
4954   if (lvec) *lvec = a->lvec;
4955   if (colmap) *colmap = a->colmap;
4956   if (multScatter) *multScatter = a->Mvctx;
4957   PetscFunctionReturn(0);
4958 }
4959 
4960 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4961 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4962 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4963 #if defined(PETSC_HAVE_ELEMENTAL)
4964 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4965 #endif
4966 
4967 #undef __FUNCT__
4968 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4969 /*
4970     Computes (B'*A')' since computing B*A directly is untenable
4971 
4972                n                       p                          p
4973         (              )       (              )         (                  )
4974       m (      A       )  *  n (       B      )   =   m (         C        )
4975         (              )       (              )         (                  )
4976 
4977 */
4978 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4979 {
4980   PetscErrorCode ierr;
4981   Mat            At,Bt,Ct;
4982 
4983   PetscFunctionBegin;
4984   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4985   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4986   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4987   ierr = MatDestroy(&At);CHKERRQ(ierr);
4988   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4989   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4990   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4991   PetscFunctionReturn(0);
4992 }
4993 
4994 #undef __FUNCT__
4995 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4996 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4997 {
4998   PetscErrorCode ierr;
4999   PetscInt       m=A->rmap->n,n=B->cmap->n;
5000   Mat            Cmat;
5001 
5002   PetscFunctionBegin;
5003   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5004   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5005   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5006   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5007   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5008   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5009   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5010   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5011 
5012   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5013 
5014   *C = Cmat;
5015   PetscFunctionReturn(0);
5016 }
5017 
5018 /* ----------------------------------------------------------------*/
5019 #undef __FUNCT__
5020 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5021 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5022 {
5023   PetscErrorCode ierr;
5024 
5025   PetscFunctionBegin;
5026   if (scall == MAT_INITIAL_MATRIX) {
5027     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5028     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5029     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5030   }
5031   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5032   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5033   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5034   PetscFunctionReturn(0);
5035 }
5036 
5037 /*MC
5038    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5039 
5040    Options Database Keys:
5041 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5042 
5043   Level: beginner
5044 
5045 .seealso: MatCreateAIJ()
5046 M*/
5047 
5048 #undef __FUNCT__
5049 #define __FUNCT__ "MatCreate_MPIAIJ"
5050 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5051 {
5052   Mat_MPIAIJ     *b;
5053   PetscErrorCode ierr;
5054   PetscMPIInt    size;
5055 
5056   PetscFunctionBegin;
5057   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5058 
5059   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5060   B->data       = (void*)b;
5061   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5062   B->assembled  = PETSC_FALSE;
5063   B->insertmode = NOT_SET_VALUES;
5064   b->size       = size;
5065 
5066   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5067 
5068   /* build cache for off array entries formed */
5069   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5070 
5071   b->donotstash  = PETSC_FALSE;
5072   b->colmap      = 0;
5073   b->garray      = 0;
5074   b->roworiented = PETSC_TRUE;
5075 
5076   /* stuff used for matrix vector multiply */
5077   b->lvec  = NULL;
5078   b->Mvctx = NULL;
5079 
5080   /* stuff for MatGetRow() */
5081   b->rowindices   = 0;
5082   b->rowvalues    = 0;
5083   b->getrowactive = PETSC_FALSE;
5084 
5085   /* flexible pointer used in CUSP/CUSPARSE classes */
5086   b->spptr = NULL;
5087 
5088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5099 #if defined(PETSC_HAVE_ELEMENTAL)
5100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5101 #endif
5102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5105   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5106   PetscFunctionReturn(0);
5107 }
5108 
5109 #undef __FUNCT__
5110 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5111 /*@C
5112      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5113          and "off-diagonal" part of the matrix in CSR format.
5114 
5115    Collective on MPI_Comm
5116 
5117    Input Parameters:
5118 +  comm - MPI communicator
5119 .  m - number of local rows (Cannot be PETSC_DECIDE)
5120 .  n - This value should be the same as the local size used in creating the
5121        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5122        calculated if N is given) For square matrices n is almost always m.
5123 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5124 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5125 .   i - row indices for "diagonal" portion of matrix
5126 .   j - column indices
5127 .   a - matrix values
5128 .   oi - row indices for "off-diagonal" portion of matrix
5129 .   oj - column indices
5130 -   oa - matrix values
5131 
5132    Output Parameter:
5133 .   mat - the matrix
5134 
5135    Level: advanced
5136 
5137    Notes:
5138        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5139        must free the arrays once the matrix has been destroyed and not before.
5140 
5141        The i and j indices are 0 based
5142 
5143        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5144 
5145        This sets local rows and cannot be used to set off-processor values.
5146 
5147        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5148        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5149        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5150        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5151        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5152        communication if it is known that only local entries will be set.
5153 
5154 .keywords: matrix, aij, compressed row, sparse, parallel
5155 
5156 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5157           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5158 @*/
5159 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5160 {
5161   PetscErrorCode ierr;
5162   Mat_MPIAIJ     *maij;
5163 
5164   PetscFunctionBegin;
5165   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5166   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5167   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5168   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5169   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5170   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5171   maij = (Mat_MPIAIJ*) (*mat)->data;
5172 
5173   (*mat)->preallocated = PETSC_TRUE;
5174 
5175   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5176   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5177 
5178   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5179   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5180 
5181   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5182   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5183   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5184   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5185 
5186   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5187   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5188   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5189   PetscFunctionReturn(0);
5190 }
5191 
5192 /*
5193     Special version for direct calls from Fortran
5194 */
5195 #include <petsc/private/fortranimpl.h>
5196 
5197 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5198 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5199 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5200 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5201 #endif
5202 
5203 /* Change these macros so can be used in void function */
5204 #undef CHKERRQ
5205 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5206 #undef SETERRQ2
5207 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5208 #undef SETERRQ3
5209 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5210 #undef SETERRQ
5211 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5212 
5213 #undef __FUNCT__
5214 #define __FUNCT__ "matsetvaluesmpiaij_"
5215 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5216 {
5217   Mat            mat  = *mmat;
5218   PetscInt       m    = *mm, n = *mn;
5219   InsertMode     addv = *maddv;
5220   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5221   PetscScalar    value;
5222   PetscErrorCode ierr;
5223 
5224   MatCheckPreallocated(mat,1);
5225   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5226 
5227 #if defined(PETSC_USE_DEBUG)
5228   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5229 #endif
5230   {
5231     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5232     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5233     PetscBool roworiented = aij->roworiented;
5234 
5235     /* Some Variables required in the macro */
5236     Mat        A                 = aij->A;
5237     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5238     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5239     MatScalar  *aa               = a->a;
5240     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5241     Mat        B                 = aij->B;
5242     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5243     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5244     MatScalar  *ba               = b->a;
5245 
5246     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5247     PetscInt  nonew = a->nonew;
5248     MatScalar *ap1,*ap2;
5249 
5250     PetscFunctionBegin;
5251     for (i=0; i<m; i++) {
5252       if (im[i] < 0) continue;
5253 #if defined(PETSC_USE_DEBUG)
5254       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5255 #endif
5256       if (im[i] >= rstart && im[i] < rend) {
5257         row      = im[i] - rstart;
5258         lastcol1 = -1;
5259         rp1      = aj + ai[row];
5260         ap1      = aa + ai[row];
5261         rmax1    = aimax[row];
5262         nrow1    = ailen[row];
5263         low1     = 0;
5264         high1    = nrow1;
5265         lastcol2 = -1;
5266         rp2      = bj + bi[row];
5267         ap2      = ba + bi[row];
5268         rmax2    = bimax[row];
5269         nrow2    = bilen[row];
5270         low2     = 0;
5271         high2    = nrow2;
5272 
5273         for (j=0; j<n; j++) {
5274           if (roworiented) value = v[i*n+j];
5275           else value = v[i+j*m];
5276           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5277           if (in[j] >= cstart && in[j] < cend) {
5278             col = in[j] - cstart;
5279             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5280           } else if (in[j] < 0) continue;
5281 #if defined(PETSC_USE_DEBUG)
5282           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5283 #endif
5284           else {
5285             if (mat->was_assembled) {
5286               if (!aij->colmap) {
5287                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5288               }
5289 #if defined(PETSC_USE_CTABLE)
5290               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5291               col--;
5292 #else
5293               col = aij->colmap[in[j]] - 1;
5294 #endif
5295               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5296                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5297                 col  =  in[j];
5298                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5299                 B     = aij->B;
5300                 b     = (Mat_SeqAIJ*)B->data;
5301                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5302                 rp2   = bj + bi[row];
5303                 ap2   = ba + bi[row];
5304                 rmax2 = bimax[row];
5305                 nrow2 = bilen[row];
5306                 low2  = 0;
5307                 high2 = nrow2;
5308                 bm    = aij->B->rmap->n;
5309                 ba    = b->a;
5310               }
5311             } else col = in[j];
5312             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5313           }
5314         }
5315       } else if (!aij->donotstash) {
5316         if (roworiented) {
5317           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5318         } else {
5319           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5320         }
5321       }
5322     }
5323   }
5324   PetscFunctionReturnVoid();
5325 }
5326 
5327