xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision eb023ebf4bbf4f9f7b6cdfd57aaf1391460201fc)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685   InsertMode     addv;
686 
687   PetscFunctionBegin;
688   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
689 
690   /* make sure all processors are either in INSERTMODE or ADDMODE */
691   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
692   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
693   mat->insertmode = addv; /* in case this processor had no cache */
694 
695   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
696   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
697   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
703 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
704 {
705   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
706   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
707   PetscErrorCode ierr;
708   PetscMPIInt    n;
709   PetscInt       i,j,rstart,ncols,flg;
710   PetscInt       *row,*col;
711   PetscBool      other_disassembled;
712   PetscScalar    *val;
713   InsertMode     addv = mat->insertmode;
714 
715   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
716 
717   PetscFunctionBegin;
718   if (!aij->donotstash && !mat->nooffprocentries) {
719     while (1) {
720       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
721       if (!flg) break;
722 
723       for (i=0; i<n; ) {
724         /* Now identify the consecutive vals belonging to the same row */
725         for (j=i,rstart=row[j]; j<n; j++) {
726           if (row[j] != rstart) break;
727         }
728         if (j < n) ncols = j-i;
729         else       ncols = n-i;
730         /* Now assemble all these values with a single function call */
731         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
732 
733         i = j;
734       }
735     }
736     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
737   }
738   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
740 
741   /* determine if any processor has disassembled, if so we must
742      also disassemble ourselfs, in order that we may reassemble. */
743   /*
744      if nonzero structure of submatrix B cannot change then we know that
745      no processor disassembled thus we can skip this stuff
746   */
747   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
748     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
749     if (mat->was_assembled && !other_disassembled) {
750       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
751     }
752   }
753   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
754     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
755   }
756   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
757   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
758   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
759 
760   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
761 
762   aij->rowvalues = 0;
763 
764   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
765   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
766 
767   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
768   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
769     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
770     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNCT__
776 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
777 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
778 {
779   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
780   PetscErrorCode ierr;
781 
782   PetscFunctionBegin;
783   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
784   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatZeroRows_MPIAIJ"
790 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
791 {
792   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
793   PetscInt      *owners = A->rmap->range;
794   PetscInt       n      = A->rmap->n;
795   PetscSF        sf;
796   PetscInt      *lrows;
797   PetscSFNode   *rrows;
798   PetscInt       r, p = 0, len = 0;
799   PetscErrorCode ierr;
800 
801   PetscFunctionBegin;
802   /* Create SF where leaves are input rows and roots are owned rows */
803   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
804   for (r = 0; r < n; ++r) lrows[r] = -1;
805   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
806   for (r = 0; r < N; ++r) {
807     const PetscInt idx   = rows[r];
808     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
809     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
810       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
811     }
812     if (A->nooffproczerorows) {
813       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
814       lrows[len++] = idx - owners[p];
815     } else {
816       rrows[r].rank = p;
817       rrows[r].index = rows[r] - owners[p];
818     }
819   }
820   if (!A->nooffproczerorows) {
821     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
822     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
823     /* Collect flags for rows to be zeroed */
824     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
825     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
826     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
827     /* Compress and put in row numbers */
828     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
829   }
830   /* fix right hand side if needed */
831   if (x && b) {
832     const PetscScalar *xx;
833     PetscScalar       *bb;
834 
835     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
836     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
837     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
838     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
839     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
840   }
841   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
843   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
844     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
848     for (r = 0; r < len; ++r) {
849       const PetscInt row = lrows[r] + A->rmap->rstart;
850       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
851     }
852     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
853     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
854   } else {
855     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
856   }
857   ierr = PetscFree(lrows);CHKERRQ(ierr);
858 
859   /* only change matrix nonzero state if pattern was allowed to be changed */
860   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
861     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
862     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
869 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
870 {
871   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
872   PetscErrorCode    ierr;
873   PetscMPIInt       n = A->rmap->n;
874   PetscInt          i,j,r,m,p = 0,len = 0;
875   PetscInt          *lrows,*owners = A->rmap->range;
876   PetscSFNode       *rrows;
877   PetscSF           sf;
878   const PetscScalar *xx;
879   PetscScalar       *bb,*mask;
880   Vec               xmask,lmask;
881   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
882   const PetscInt    *aj, *ii,*ridx;
883   PetscScalar       *aa;
884 
885   PetscFunctionBegin;
886   /* Create SF where leaves are input rows and roots are owned rows */
887   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
888   for (r = 0; r < n; ++r) lrows[r] = -1;
889   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
890   for (r = 0; r < N; ++r) {
891     const PetscInt idx   = rows[r];
892     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
893     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
894       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
895     }
896     rrows[r].rank  = p;
897     rrows[r].index = rows[r] - owners[p];
898   }
899   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
900   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
901   /* Collect flags for rows to be zeroed */
902   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
903   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
904   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
905   /* Compress and put in row numbers */
906   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
907   /* zero diagonal part of matrix */
908   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
909   /* handle off diagonal part of matrix */
910   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
911   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
912   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
913   for (i=0; i<len; i++) bb[lrows[i]] = 1;
914   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
915   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
918   if (x) {
919     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
922     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
923   }
924   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
925   /* remove zeroed rows of off diagonal matrix */
926   ii = aij->i;
927   for (i=0; i<len; i++) {
928     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
929   }
930   /* loop over all elements of off process part of matrix zeroing removed columns*/
931   if (aij->compressedrow.use) {
932     m    = aij->compressedrow.nrows;
933     ii   = aij->compressedrow.i;
934     ridx = aij->compressedrow.rindex;
935     for (i=0; i<m; i++) {
936       n  = ii[i+1] - ii[i];
937       aj = aij->j + ii[i];
938       aa = aij->a + ii[i];
939 
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[*ridx] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948       ridx++;
949     }
950   } else { /* do not use compressed row format */
951     m = l->B->rmap->n;
952     for (i=0; i<m; i++) {
953       n  = ii[i+1] - ii[i];
954       aj = aij->j + ii[i];
955       aa = aij->a + ii[i];
956       for (j=0; j<n; j++) {
957         if (PetscAbsScalar(mask[*aj])) {
958           if (b) bb[i] -= *aa*xx[*aj];
959           *aa = 0.0;
960         }
961         aa++;
962         aj++;
963       }
964     }
965   }
966   if (x) {
967     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
968     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
969   }
970   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
971   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
972   ierr = PetscFree(lrows);CHKERRQ(ierr);
973 
974   /* only change matrix nonzero state if pattern was allowed to be changed */
975   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
976     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
977     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatMult_MPIAIJ"
984 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
985 {
986   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
987   PetscErrorCode ierr;
988   PetscInt       nt;
989 
990   PetscFunctionBegin;
991   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
992   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
993   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
995   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1002 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1003 {
1004   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1014 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1015 {
1016   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1022   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1029 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1030 {
1031   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1032   PetscErrorCode ierr;
1033   PetscBool      merged;
1034 
1035   PetscFunctionBegin;
1036   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1037   /* do nondiagonal part */
1038   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1039   if (!merged) {
1040     /* send it on its way */
1041     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1042     /* do local part */
1043     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1044     /* receive remote parts: note this assumes the values are not actually */
1045     /* added in yy until the next line, */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   } else {
1048     /* do local part */
1049     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1050     /* send it on its way */
1051     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1052     /* values actually were received in the Begin() but we need to call this nop */
1053     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1060 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1061 {
1062   MPI_Comm       comm;
1063   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1064   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1065   IS             Me,Notme;
1066   PetscErrorCode ierr;
1067   PetscInt       M,N,first,last,*notme,i;
1068   PetscMPIInt    size;
1069 
1070   PetscFunctionBegin;
1071   /* Easy test: symmetric diagonal block */
1072   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1073   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1074   if (!*f) PetscFunctionReturn(0);
1075   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1076   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1077   if (size == 1) PetscFunctionReturn(0);
1078 
1079   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1080   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1081   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1082   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1083   for (i=0; i<first; i++) notme[i] = i;
1084   for (i=last; i<M; i++) notme[i-last+first] = i;
1085   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1086   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1087   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1088   Aoff = Aoffs[0];
1089   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1090   Boff = Boffs[0];
1091   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1092   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1093   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1094   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1095   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1096   ierr = PetscFree(notme);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1102 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   /* do nondiagonal part */
1109   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1110   /* send it on its way */
1111   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1112   /* do local part */
1113   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1114   /* receive remote parts */
1115   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 /*
1120   This only works correctly for square matrices where the subblock A->A is the
1121    diagonal block
1122 */
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1125 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1126 {
1127   PetscErrorCode ierr;
1128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1129 
1130   PetscFunctionBegin;
1131   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1132   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1133   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatScale_MPIAIJ"
1139 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1140 {
1141   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1146   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatDestroy_MPIAIJ"
1152 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1153 {
1154   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158 #if defined(PETSC_USE_LOG)
1159   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1160 #endif
1161   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1162   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1163   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1164   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1165 #if defined(PETSC_USE_CTABLE)
1166   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1167 #else
1168   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1169 #endif
1170   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1171   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1172   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1173   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1174   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1175   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1176 
1177   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1186 #if defined(PETSC_HAVE_ELEMENTAL)
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1188 #endif
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1194 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1195 {
1196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1197   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1198   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1199   PetscErrorCode ierr;
1200   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1201   int            fd;
1202   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1203   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1204   PetscScalar    *column_values;
1205   PetscInt       message_count,flowcontrolcount;
1206   FILE           *file;
1207 
1208   PetscFunctionBegin;
1209   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1210   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1211   nz   = A->nz + B->nz;
1212   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1213   if (!rank) {
1214     header[0] = MAT_FILE_CLASSID;
1215     header[1] = mat->rmap->N;
1216     header[2] = mat->cmap->N;
1217 
1218     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     /* get largest number of rows any processor has */
1221     rlen  = mat->rmap->n;
1222     range = mat->rmap->range;
1223     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1224   } else {
1225     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226     rlen = mat->rmap->n;
1227   }
1228 
1229   /* load up the local row counts */
1230   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1231   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1232 
1233   /* store the row lengths to the file */
1234   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1235   if (!rank) {
1236     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1239       rlen = range[i+1] - range[i];
1240       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1246     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1248   }
1249   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1250 
1251   /* load up the local column indices */
1252   nzmax = nz; /* th processor needs space a largest processor needs */
1253   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1255   cnt   = 0;
1256   for (i=0; i<mat->rmap->n; i++) {
1257     for (j=B->i[i]; j<B->i[i+1]; j++) {
1258       if ((col = garray[B->j[j]]) > cstart) break;
1259       column_indices[cnt++] = col;
1260     }
1261     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1262     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1263   }
1264   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1265 
1266   /* store the column indices to the file */
1267   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1268   if (!rank) {
1269     MPI_Status status;
1270     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1271     for (i=1; i<size; i++) {
1272       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1273       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1274       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1275       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1277     }
1278     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1279   } else {
1280     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1281     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1284   }
1285   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1286 
1287   /* load up the local column values */
1288   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1289   cnt  = 0;
1290   for (i=0; i<mat->rmap->n; i++) {
1291     for (j=B->i[i]; j<B->i[i+1]; j++) {
1292       if (garray[B->j[j]] > cstart) break;
1293       column_values[cnt++] = B->a[j];
1294     }
1295     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1296     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1297   }
1298   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1299 
1300   /* store the column values to the file */
1301   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1302   if (!rank) {
1303     MPI_Status status;
1304     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1305     for (i=1; i<size; i++) {
1306       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1307       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1308       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1309       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1311     }
1312     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1313   } else {
1314     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1315     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1316     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1317     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1318   }
1319   ierr = PetscFree(column_values);CHKERRQ(ierr);
1320 
1321   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1322   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #include <petscdraw.h>
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1329 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1330 {
1331   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1332   PetscErrorCode    ierr;
1333   PetscMPIInt       rank = aij->rank,size = aij->size;
1334   PetscBool         isdraw,iascii,isbinary;
1335   PetscViewer       sviewer;
1336   PetscViewerFormat format;
1337 
1338   PetscFunctionBegin;
1339   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1340   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1341   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1342   if (iascii) {
1343     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1344     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1345       MatInfo   info;
1346       PetscBool inodes;
1347 
1348       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1349       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1350       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1351       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1352       if (!inodes) {
1353         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1354                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1355       } else {
1356         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1357                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1358       }
1359       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1360       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1361       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1362       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1363       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1365       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1366       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1367       PetscFunctionReturn(0);
1368     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1369       PetscInt inodecount,inodelimit,*inodes;
1370       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1371       if (inodes) {
1372         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1373       } else {
1374         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1375       }
1376       PetscFunctionReturn(0);
1377     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1378       PetscFunctionReturn(0);
1379     }
1380   } else if (isbinary) {
1381     if (size == 1) {
1382       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1383       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1384     } else {
1385       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1386     }
1387     PetscFunctionReturn(0);
1388   } else if (isdraw) {
1389     PetscDraw draw;
1390     PetscBool isnull;
1391     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1392     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1393   }
1394 
1395   {
1396     /* assemble the entire matrix onto first processor. */
1397     Mat        A;
1398     Mat_SeqAIJ *Aloc;
1399     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1400     MatScalar  *a;
1401 
1402     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1403     if (!rank) {
1404       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1405     } else {
1406       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1407     }
1408     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1409     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1410     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1411     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1412     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1413 
1414     /* copy over the A part */
1415     Aloc = (Mat_SeqAIJ*)aij->A->data;
1416     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1417     row  = mat->rmap->rstart;
1418     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1419     for (i=0; i<m; i++) {
1420       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1421       row++;
1422       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1423     }
1424     aj = Aloc->j;
1425     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1426 
1427     /* copy over the B part */
1428     Aloc = (Mat_SeqAIJ*)aij->B->data;
1429     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1430     row  = mat->rmap->rstart;
1431     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1432     ct   = cols;
1433     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1434     for (i=0; i<m; i++) {
1435       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1436       row++;
1437       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1438     }
1439     ierr = PetscFree(ct);CHKERRQ(ierr);
1440     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1441     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1442     /*
1443        Everyone has to call to draw the matrix since the graphics waits are
1444        synchronized across all processors that share the PetscDraw object
1445     */
1446     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1447     if (!rank) {
1448       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1449       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1450     }
1451     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1452     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1453     ierr = MatDestroy(&A);CHKERRQ(ierr);
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ"
1460 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1461 {
1462   PetscErrorCode ierr;
1463   PetscBool      iascii,isdraw,issocket,isbinary;
1464 
1465   PetscFunctionBegin;
1466   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1468   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1469   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1470   if (iascii || isdraw || isbinary || issocket) {
1471     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatSOR_MPIAIJ"
1478 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1479 {
1480   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1481   PetscErrorCode ierr;
1482   Vec            bb1 = 0;
1483   PetscBool      hasop;
1484 
1485   PetscFunctionBegin;
1486   if (flag == SOR_APPLY_UPPER) {
1487     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1488     PetscFunctionReturn(0);
1489   }
1490 
1491   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1492     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1493   }
1494 
1495   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1496     if (flag & SOR_ZERO_INITIAL_GUESS) {
1497       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1498       its--;
1499     }
1500 
1501     while (its--) {
1502       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504 
1505       /* update rhs: bb1 = bb - B*x */
1506       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1507       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1508 
1509       /* local sweep */
1510       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1511     }
1512   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1513     if (flag & SOR_ZERO_INITIAL_GUESS) {
1514       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1515       its--;
1516     }
1517     while (its--) {
1518       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520 
1521       /* update rhs: bb1 = bb - B*x */
1522       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1523       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1524 
1525       /* local sweep */
1526       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1527     }
1528   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1529     if (flag & SOR_ZERO_INITIAL_GUESS) {
1530       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1531       its--;
1532     }
1533     while (its--) {
1534       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536 
1537       /* update rhs: bb1 = bb - B*x */
1538       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1539       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1540 
1541       /* local sweep */
1542       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1543     }
1544   } else if (flag & SOR_EISENSTAT) {
1545     Vec xx1;
1546 
1547     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1548     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1549 
1550     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552     if (!mat->diag) {
1553       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1554       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1555     }
1556     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1557     if (hasop) {
1558       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1559     } else {
1560       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1561     }
1562     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1563 
1564     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1565 
1566     /* local sweep */
1567     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1568     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1569     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1570   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1571 
1572   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatPermute_MPIAIJ"
1578 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1579 {
1580   Mat            aA,aB,Aperm;
1581   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1582   PetscScalar    *aa,*ba;
1583   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1584   PetscSF        rowsf,sf;
1585   IS             parcolp = NULL;
1586   PetscBool      done;
1587   PetscErrorCode ierr;
1588 
1589   PetscFunctionBegin;
1590   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1591   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1592   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1593   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1594 
1595   /* Invert row permutation to find out where my rows should go */
1596   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1597   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1598   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1599   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1600   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1601   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1602 
1603   /* Invert column permutation to find out where my columns should go */
1604   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1605   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1606   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1607   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1608   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1609   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1610   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1611 
1612   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1613   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1614   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1615 
1616   /* Find out where my gcols should go */
1617   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1618   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1619   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1620   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1621   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1622   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1623   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1624   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1625 
1626   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1627   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1628   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1629   for (i=0; i<m; i++) {
1630     PetscInt row = rdest[i],rowner;
1631     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1632     for (j=ai[i]; j<ai[i+1]; j++) {
1633       PetscInt cowner,col = cdest[aj[j]];
1634       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1635       if (rowner == cowner) dnnz[i]++;
1636       else onnz[i]++;
1637     }
1638     for (j=bi[i]; j<bi[i+1]; j++) {
1639       PetscInt cowner,col = gcdest[bj[j]];
1640       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1641       if (rowner == cowner) dnnz[i]++;
1642       else onnz[i]++;
1643     }
1644   }
1645   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1646   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1647   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1648   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1649   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1650 
1651   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1652   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1653   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1654   for (i=0; i<m; i++) {
1655     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1656     PetscInt j0,rowlen;
1657     rowlen = ai[i+1] - ai[i];
1658     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1659       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1660       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1661     }
1662     rowlen = bi[i+1] - bi[i];
1663     for (j0=j=0; j<rowlen; j0=j) {
1664       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1665       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1666     }
1667   }
1668   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1671   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1672   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1673   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1674   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1675   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1676   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1677   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1678   *B = Aperm;
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1684 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1685 {
1686   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1687   PetscErrorCode ierr;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1691   if (ghosts) *ghosts = aij->garray;
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1697 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1698 {
1699   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1700   Mat            A    = mat->A,B = mat->B;
1701   PetscErrorCode ierr;
1702   PetscReal      isend[5],irecv[5];
1703 
1704   PetscFunctionBegin;
1705   info->block_size = 1.0;
1706   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1707 
1708   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1709   isend[3] = info->memory;  isend[4] = info->mallocs;
1710 
1711   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1712 
1713   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1714   isend[3] += info->memory;  isend[4] += info->mallocs;
1715   if (flag == MAT_LOCAL) {
1716     info->nz_used      = isend[0];
1717     info->nz_allocated = isend[1];
1718     info->nz_unneeded  = isend[2];
1719     info->memory       = isend[3];
1720     info->mallocs      = isend[4];
1721   } else if (flag == MAT_GLOBAL_MAX) {
1722     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1723 
1724     info->nz_used      = irecv[0];
1725     info->nz_allocated = irecv[1];
1726     info->nz_unneeded  = irecv[2];
1727     info->memory       = irecv[3];
1728     info->mallocs      = irecv[4];
1729   } else if (flag == MAT_GLOBAL_SUM) {
1730     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1731 
1732     info->nz_used      = irecv[0];
1733     info->nz_allocated = irecv[1];
1734     info->nz_unneeded  = irecv[2];
1735     info->memory       = irecv[3];
1736     info->mallocs      = irecv[4];
1737   }
1738   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1739   info->fill_ratio_needed = 0;
1740   info->factor_mallocs    = 0;
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatSetOption_MPIAIJ"
1746 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1747 {
1748   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1749   PetscErrorCode ierr;
1750 
1751   PetscFunctionBegin;
1752   switch (op) {
1753   case MAT_NEW_NONZERO_LOCATIONS:
1754   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1755   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1756   case MAT_KEEP_NONZERO_PATTERN:
1757   case MAT_NEW_NONZERO_LOCATION_ERR:
1758   case MAT_USE_INODES:
1759   case MAT_IGNORE_ZERO_ENTRIES:
1760     MatCheckPreallocated(A,1);
1761     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1762     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1763     break;
1764   case MAT_ROW_ORIENTED:
1765     a->roworiented = flg;
1766 
1767     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1768     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1769     break;
1770   case MAT_NEW_DIAGONALS:
1771     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1772     break;
1773   case MAT_IGNORE_OFF_PROC_ENTRIES:
1774     a->donotstash = flg;
1775     break;
1776   case MAT_SPD:
1777     A->spd_set = PETSC_TRUE;
1778     A->spd     = flg;
1779     if (flg) {
1780       A->symmetric                  = PETSC_TRUE;
1781       A->structurally_symmetric     = PETSC_TRUE;
1782       A->symmetric_set              = PETSC_TRUE;
1783       A->structurally_symmetric_set = PETSC_TRUE;
1784     }
1785     break;
1786   case MAT_SYMMETRIC:
1787     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1788     break;
1789   case MAT_STRUCTURALLY_SYMMETRIC:
1790     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1791     break;
1792   case MAT_HERMITIAN:
1793     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1794     break;
1795   case MAT_SYMMETRY_ETERNAL:
1796     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1797     break;
1798   default:
1799     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatGetRow_MPIAIJ"
1806 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1807 {
1808   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1809   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1810   PetscErrorCode ierr;
1811   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1812   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1813   PetscInt       *cmap,*idx_p;
1814 
1815   PetscFunctionBegin;
1816   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1817   mat->getrowactive = PETSC_TRUE;
1818 
1819   if (!mat->rowvalues && (idx || v)) {
1820     /*
1821         allocate enough space to hold information from the longest row.
1822     */
1823     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1824     PetscInt   max = 1,tmp;
1825     for (i=0; i<matin->rmap->n; i++) {
1826       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1827       if (max < tmp) max = tmp;
1828     }
1829     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1830   }
1831 
1832   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1833   lrow = row - rstart;
1834 
1835   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1836   if (!v)   {pvA = 0; pvB = 0;}
1837   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1838   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1839   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1840   nztot = nzA + nzB;
1841 
1842   cmap = mat->garray;
1843   if (v  || idx) {
1844     if (nztot) {
1845       /* Sort by increasing column numbers, assuming A and B already sorted */
1846       PetscInt imark = -1;
1847       if (v) {
1848         *v = v_p = mat->rowvalues;
1849         for (i=0; i<nzB; i++) {
1850           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1851           else break;
1852         }
1853         imark = i;
1854         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1855         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1856       }
1857       if (idx) {
1858         *idx = idx_p = mat->rowindices;
1859         if (imark > -1) {
1860           for (i=0; i<imark; i++) {
1861             idx_p[i] = cmap[cworkB[i]];
1862           }
1863         } else {
1864           for (i=0; i<nzB; i++) {
1865             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1866             else break;
1867           }
1868           imark = i;
1869         }
1870         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1871         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1872       }
1873     } else {
1874       if (idx) *idx = 0;
1875       if (v)   *v   = 0;
1876     }
1877   }
1878   *nz  = nztot;
1879   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1880   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1886 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1887 {
1888   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1889 
1890   PetscFunctionBegin;
1891   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1892   aij->getrowactive = PETSC_FALSE;
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatNorm_MPIAIJ"
1898 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1899 {
1900   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1901   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1902   PetscErrorCode ierr;
1903   PetscInt       i,j,cstart = mat->cmap->rstart;
1904   PetscReal      sum = 0.0;
1905   MatScalar      *v;
1906 
1907   PetscFunctionBegin;
1908   if (aij->size == 1) {
1909     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1910   } else {
1911     if (type == NORM_FROBENIUS) {
1912       v = amat->a;
1913       for (i=0; i<amat->nz; i++) {
1914         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1915       }
1916       v = bmat->a;
1917       for (i=0; i<bmat->nz; i++) {
1918         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1919       }
1920       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1921       *norm = PetscSqrtReal(*norm);
1922     } else if (type == NORM_1) { /* max column norm */
1923       PetscReal *tmp,*tmp2;
1924       PetscInt  *jj,*garray = aij->garray;
1925       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1926       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1927       *norm = 0.0;
1928       v     = amat->a; jj = amat->j;
1929       for (j=0; j<amat->nz; j++) {
1930         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1931       }
1932       v = bmat->a; jj = bmat->j;
1933       for (j=0; j<bmat->nz; j++) {
1934         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1935       }
1936       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1937       for (j=0; j<mat->cmap->N; j++) {
1938         if (tmp2[j] > *norm) *norm = tmp2[j];
1939       }
1940       ierr = PetscFree(tmp);CHKERRQ(ierr);
1941       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1942     } else if (type == NORM_INFINITY) { /* max row norm */
1943       PetscReal ntemp = 0.0;
1944       for (j=0; j<aij->A->rmap->n; j++) {
1945         v   = amat->a + amat->i[j];
1946         sum = 0.0;
1947         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1948           sum += PetscAbsScalar(*v); v++;
1949         }
1950         v = bmat->a + bmat->i[j];
1951         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1952           sum += PetscAbsScalar(*v); v++;
1953         }
1954         if (sum > ntemp) ntemp = sum;
1955       }
1956       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1957     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1958   }
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatTranspose_MPIAIJ"
1964 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1965 {
1966   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1967   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1968   PetscErrorCode ierr;
1969   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1970   PetscInt       cstart = A->cmap->rstart,ncol;
1971   Mat            B;
1972   MatScalar      *array;
1973 
1974   PetscFunctionBegin;
1975   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1976 
1977   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1978   ai = Aloc->i; aj = Aloc->j;
1979   bi = Bloc->i; bj = Bloc->j;
1980   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1981     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1982     PetscSFNode          *oloc;
1983     PETSC_UNUSED PetscSF sf;
1984 
1985     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1986     /* compute d_nnz for preallocation */
1987     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1988     for (i=0; i<ai[ma]; i++) {
1989       d_nnz[aj[i]]++;
1990       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1991     }
1992     /* compute local off-diagonal contributions */
1993     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1994     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1995     /* map those to global */
1996     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1997     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1998     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1999     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2000     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2001     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2002     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2003 
2004     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2005     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2006     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2007     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2008     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2009     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2010   } else {
2011     B    = *matout;
2012     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2013     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2014   }
2015 
2016   /* copy over the A part */
2017   array = Aloc->a;
2018   row   = A->rmap->rstart;
2019   for (i=0; i<ma; i++) {
2020     ncol = ai[i+1]-ai[i];
2021     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2022     row++;
2023     array += ncol; aj += ncol;
2024   }
2025   aj = Aloc->j;
2026   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2027 
2028   /* copy over the B part */
2029   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2030   array = Bloc->a;
2031   row   = A->rmap->rstart;
2032   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2033   cols_tmp = cols;
2034   for (i=0; i<mb; i++) {
2035     ncol = bi[i+1]-bi[i];
2036     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2037     row++;
2038     array += ncol; cols_tmp += ncol;
2039   }
2040   ierr = PetscFree(cols);CHKERRQ(ierr);
2041 
2042   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2043   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2044   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2045     *matout = B;
2046   } else {
2047     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2048   }
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2054 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2055 {
2056   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2057   Mat            a    = aij->A,b = aij->B;
2058   PetscErrorCode ierr;
2059   PetscInt       s1,s2,s3;
2060 
2061   PetscFunctionBegin;
2062   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2063   if (rr) {
2064     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2065     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2066     /* Overlap communication with computation. */
2067     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2068   }
2069   if (ll) {
2070     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2071     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2072     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2073   }
2074   /* scale  the diagonal block */
2075   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2076 
2077   if (rr) {
2078     /* Do a scatter end and then right scale the off-diagonal block */
2079     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2080     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2081   }
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2087 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2088 {
2089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2090   PetscErrorCode ierr;
2091 
2092   PetscFunctionBegin;
2093   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatEqual_MPIAIJ"
2099 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2100 {
2101   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2102   Mat            a,b,c,d;
2103   PetscBool      flg;
2104   PetscErrorCode ierr;
2105 
2106   PetscFunctionBegin;
2107   a = matA->A; b = matA->B;
2108   c = matB->A; d = matB->B;
2109 
2110   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2111   if (flg) {
2112     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2113   }
2114   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatCopy_MPIAIJ"
2120 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2121 {
2122   PetscErrorCode ierr;
2123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2124   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2125 
2126   PetscFunctionBegin;
2127   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2128   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2129     /* because of the column compression in the off-processor part of the matrix a->B,
2130        the number of columns in a->B and b->B may be different, hence we cannot call
2131        the MatCopy() directly on the two parts. If need be, we can provide a more
2132        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2133        then copying the submatrices */
2134     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2135   } else {
2136     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2137     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2138   }
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatSetUp_MPIAIJ"
2144 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2145 {
2146   PetscErrorCode ierr;
2147 
2148   PetscFunctionBegin;
2149   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 /*
2154    Computes the number of nonzeros per row needed for preallocation when X and Y
2155    have different nonzero structure.
2156 */
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2159 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2160 {
2161   PetscInt       i,j,k,nzx,nzy;
2162 
2163   PetscFunctionBegin;
2164   /* Set the number of nonzeros in the new matrix */
2165   for (i=0; i<m; i++) {
2166     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2167     nzx = xi[i+1] - xi[i];
2168     nzy = yi[i+1] - yi[i];
2169     nnz[i] = 0;
2170     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2171       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2172       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2173       nnz[i]++;
2174     }
2175     for (; k<nzy; k++) nnz[i]++;
2176   }
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2181 #undef __FUNCT__
2182 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2183 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2184 {
2185   PetscErrorCode ierr;
2186   PetscInt       m = Y->rmap->N;
2187   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2188   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2189 
2190   PetscFunctionBegin;
2191   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatAXPY_MPIAIJ"
2197 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2198 {
2199   PetscErrorCode ierr;
2200   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2201   PetscBLASInt   bnz,one=1;
2202   Mat_SeqAIJ     *x,*y;
2203 
2204   PetscFunctionBegin;
2205   if (str == SAME_NONZERO_PATTERN) {
2206     PetscScalar alpha = a;
2207     x    = (Mat_SeqAIJ*)xx->A->data;
2208     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2209     y    = (Mat_SeqAIJ*)yy->A->data;
2210     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2211     x    = (Mat_SeqAIJ*)xx->B->data;
2212     y    = (Mat_SeqAIJ*)yy->B->data;
2213     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2214     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2215     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2216   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2217     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2218   } else {
2219     Mat      B;
2220     PetscInt *nnz_d,*nnz_o;
2221     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2222     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2223     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2224     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2225     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2226     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2227     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2228     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2229     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2230     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2231     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2232     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2233     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2234     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2235   }
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2240 
2241 #undef __FUNCT__
2242 #define __FUNCT__ "MatConjugate_MPIAIJ"
2243 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2244 {
2245 #if defined(PETSC_USE_COMPLEX)
2246   PetscErrorCode ierr;
2247   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2248 
2249   PetscFunctionBegin;
2250   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2251   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2252 #else
2253   PetscFunctionBegin;
2254 #endif
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatRealPart_MPIAIJ"
2260 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2261 {
2262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2263   PetscErrorCode ierr;
2264 
2265   PetscFunctionBegin;
2266   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2267   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2273 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2274 {
2275   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2276   PetscErrorCode ierr;
2277 
2278   PetscFunctionBegin;
2279   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2280   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2286 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2287 {
2288   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2289   PetscErrorCode ierr;
2290   PetscInt       i,*idxb = 0;
2291   PetscScalar    *va,*vb;
2292   Vec            vtmp;
2293 
2294   PetscFunctionBegin;
2295   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2296   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2297   if (idx) {
2298     for (i=0; i<A->rmap->n; i++) {
2299       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2300     }
2301   }
2302 
2303   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2304   if (idx) {
2305     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2306   }
2307   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2308   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2309 
2310   for (i=0; i<A->rmap->n; i++) {
2311     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2312       va[i] = vb[i];
2313       if (idx) idx[i] = a->garray[idxb[i]];
2314     }
2315   }
2316 
2317   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2318   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2319   ierr = PetscFree(idxb);CHKERRQ(ierr);
2320   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 #undef __FUNCT__
2325 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2326 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2327 {
2328   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2329   PetscErrorCode ierr;
2330   PetscInt       i,*idxb = 0;
2331   PetscScalar    *va,*vb;
2332   Vec            vtmp;
2333 
2334   PetscFunctionBegin;
2335   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2336   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2337   if (idx) {
2338     for (i=0; i<A->cmap->n; i++) {
2339       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2340     }
2341   }
2342 
2343   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2344   if (idx) {
2345     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2346   }
2347   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2348   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2349 
2350   for (i=0; i<A->rmap->n; i++) {
2351     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2352       va[i] = vb[i];
2353       if (idx) idx[i] = a->garray[idxb[i]];
2354     }
2355   }
2356 
2357   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2358   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2359   ierr = PetscFree(idxb);CHKERRQ(ierr);
2360   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 #undef __FUNCT__
2365 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2366 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2367 {
2368   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2369   PetscInt       n      = A->rmap->n;
2370   PetscInt       cstart = A->cmap->rstart;
2371   PetscInt       *cmap  = mat->garray;
2372   PetscInt       *diagIdx, *offdiagIdx;
2373   Vec            diagV, offdiagV;
2374   PetscScalar    *a, *diagA, *offdiagA;
2375   PetscInt       r;
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2380   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2381   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2382   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2383   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2384   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2385   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2386   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2387   for (r = 0; r < n; ++r) {
2388     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2389       a[r]   = diagA[r];
2390       idx[r] = cstart + diagIdx[r];
2391     } else {
2392       a[r]   = offdiagA[r];
2393       idx[r] = cmap[offdiagIdx[r]];
2394     }
2395   }
2396   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2397   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2398   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2399   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2400   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2401   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2407 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2408 {
2409   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2410   PetscInt       n      = A->rmap->n;
2411   PetscInt       cstart = A->cmap->rstart;
2412   PetscInt       *cmap  = mat->garray;
2413   PetscInt       *diagIdx, *offdiagIdx;
2414   Vec            diagV, offdiagV;
2415   PetscScalar    *a, *diagA, *offdiagA;
2416   PetscInt       r;
2417   PetscErrorCode ierr;
2418 
2419   PetscFunctionBegin;
2420   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2421   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2422   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2423   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2424   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2425   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2426   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2427   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2428   for (r = 0; r < n; ++r) {
2429     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2430       a[r]   = diagA[r];
2431       idx[r] = cstart + diagIdx[r];
2432     } else {
2433       a[r]   = offdiagA[r];
2434       idx[r] = cmap[offdiagIdx[r]];
2435     }
2436   }
2437   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2438   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2439   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2440   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2441   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2442   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 #undef __FUNCT__
2447 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2448 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2449 {
2450   PetscErrorCode ierr;
2451   Mat            *dummy;
2452 
2453   PetscFunctionBegin;
2454   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2455   *newmat = *dummy;
2456   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 #undef __FUNCT__
2461 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2462 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2463 {
2464   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2465   PetscErrorCode ierr;
2466 
2467   PetscFunctionBegin;
2468   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 #undef __FUNCT__
2473 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2474 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2475 {
2476   PetscErrorCode ierr;
2477   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2478 
2479   PetscFunctionBegin;
2480   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2481   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2482   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2483   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 #undef __FUNCT__
2488 #define __FUNCT__ "MatShift_MPIAIJ"
2489 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2490 {
2491   PetscErrorCode ierr;
2492   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2493   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2494 
2495   PetscFunctionBegin;
2496   if (!Y->preallocated) {
2497     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2498   } else if (!aij->nz) {
2499     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2500   }
2501   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 /* -------------------------------------------------------------------*/
2506 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2507                                        MatGetRow_MPIAIJ,
2508                                        MatRestoreRow_MPIAIJ,
2509                                        MatMult_MPIAIJ,
2510                                 /* 4*/ MatMultAdd_MPIAIJ,
2511                                        MatMultTranspose_MPIAIJ,
2512                                        MatMultTransposeAdd_MPIAIJ,
2513                                        0,
2514                                        0,
2515                                        0,
2516                                 /*10*/ 0,
2517                                        0,
2518                                        0,
2519                                        MatSOR_MPIAIJ,
2520                                        MatTranspose_MPIAIJ,
2521                                 /*15*/ MatGetInfo_MPIAIJ,
2522                                        MatEqual_MPIAIJ,
2523                                        MatGetDiagonal_MPIAIJ,
2524                                        MatDiagonalScale_MPIAIJ,
2525                                        MatNorm_MPIAIJ,
2526                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2527                                        MatAssemblyEnd_MPIAIJ,
2528                                        MatSetOption_MPIAIJ,
2529                                        MatZeroEntries_MPIAIJ,
2530                                 /*24*/ MatZeroRows_MPIAIJ,
2531                                        0,
2532                                        0,
2533                                        0,
2534                                        0,
2535                                 /*29*/ MatSetUp_MPIAIJ,
2536                                        0,
2537                                        0,
2538                                        0,
2539                                        0,
2540                                 /*34*/ MatDuplicate_MPIAIJ,
2541                                        0,
2542                                        0,
2543                                        0,
2544                                        0,
2545                                 /*39*/ MatAXPY_MPIAIJ,
2546                                        MatGetSubMatrices_MPIAIJ,
2547                                        MatIncreaseOverlap_MPIAIJ,
2548                                        MatGetValues_MPIAIJ,
2549                                        MatCopy_MPIAIJ,
2550                                 /*44*/ MatGetRowMax_MPIAIJ,
2551                                        MatScale_MPIAIJ,
2552                                        MatShift_MPIAIJ,
2553                                        MatDiagonalSet_MPIAIJ,
2554                                        MatZeroRowsColumns_MPIAIJ,
2555                                 /*49*/ MatSetRandom_MPIAIJ,
2556                                        0,
2557                                        0,
2558                                        0,
2559                                        0,
2560                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2561                                        0,
2562                                        MatSetUnfactored_MPIAIJ,
2563                                        MatPermute_MPIAIJ,
2564                                        0,
2565                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2566                                        MatDestroy_MPIAIJ,
2567                                        MatView_MPIAIJ,
2568                                        0,
2569                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2570                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2571                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2572                                        0,
2573                                        0,
2574                                        0,
2575                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2576                                        MatGetRowMinAbs_MPIAIJ,
2577                                        0,
2578                                        MatSetColoring_MPIAIJ,
2579                                        0,
2580                                        MatSetValuesAdifor_MPIAIJ,
2581                                 /*75*/ MatFDColoringApply_AIJ,
2582                                        0,
2583                                        0,
2584                                        0,
2585                                        MatFindZeroDiagonals_MPIAIJ,
2586                                 /*80*/ 0,
2587                                        0,
2588                                        0,
2589                                 /*83*/ MatLoad_MPIAIJ,
2590                                        0,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2596                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2597                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2598                                        MatPtAP_MPIAIJ_MPIAIJ,
2599                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2600                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                        0,
2605                                 /*99*/ 0,
2606                                        0,
2607                                        0,
2608                                        MatConjugate_MPIAIJ,
2609                                        0,
2610                                 /*104*/MatSetValuesRow_MPIAIJ,
2611                                        MatRealPart_MPIAIJ,
2612                                        MatImaginaryPart_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                 /*109*/0,
2616                                        0,
2617                                        MatGetRowMin_MPIAIJ,
2618                                        0,
2619                                        0,
2620                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2621                                        0,
2622                                        MatGetGhosts_MPIAIJ,
2623                                        0,
2624                                        0,
2625                                 /*119*/0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                        MatGetMultiProcBlock_MPIAIJ,
2630                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2631                                        MatGetColumnNorms_MPIAIJ,
2632                                        MatInvertBlockDiagonal_MPIAIJ,
2633                                        0,
2634                                        MatGetSubMatricesMPI_MPIAIJ,
2635                                 /*129*/0,
2636                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2637                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2638                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2639                                        0,
2640                                 /*134*/0,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                 /*139*/0,
2646                                        0,
2647                                        0,
2648                                        MatFDColoringSetUp_MPIXAIJ,
2649                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2650                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2651 };
2652 
2653 /* ----------------------------------------------------------------------------------------*/
2654 
2655 #undef __FUNCT__
2656 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2657 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2658 {
2659   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2660   PetscErrorCode ierr;
2661 
2662   PetscFunctionBegin;
2663   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2664   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2665   PetscFunctionReturn(0);
2666 }
2667 
2668 #undef __FUNCT__
2669 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2670 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2671 {
2672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2673   PetscErrorCode ierr;
2674 
2675   PetscFunctionBegin;
2676   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2677   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2683 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2684 {
2685   Mat_MPIAIJ     *b;
2686   PetscErrorCode ierr;
2687 
2688   PetscFunctionBegin;
2689   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2690   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2691   b = (Mat_MPIAIJ*)B->data;
2692 
2693   if (!B->preallocated) {
2694     /* Explicitly create 2 MATSEQAIJ matrices. */
2695     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2696     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2697     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2698     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2699     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2700     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2701     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2702     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2703     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2704     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2705   }
2706 
2707   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2708   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2709   B->preallocated = PETSC_TRUE;
2710   PetscFunctionReturn(0);
2711 }
2712 
2713 #undef __FUNCT__
2714 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2715 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2716 {
2717   Mat            mat;
2718   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2719   PetscErrorCode ierr;
2720 
2721   PetscFunctionBegin;
2722   *newmat = 0;
2723   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2724   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2725   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2726   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2727   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2728   a       = (Mat_MPIAIJ*)mat->data;
2729 
2730   mat->factortype   = matin->factortype;
2731   mat->assembled    = PETSC_TRUE;
2732   mat->insertmode   = NOT_SET_VALUES;
2733   mat->preallocated = PETSC_TRUE;
2734 
2735   a->size         = oldmat->size;
2736   a->rank         = oldmat->rank;
2737   a->donotstash   = oldmat->donotstash;
2738   a->roworiented  = oldmat->roworiented;
2739   a->rowindices   = 0;
2740   a->rowvalues    = 0;
2741   a->getrowactive = PETSC_FALSE;
2742 
2743   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2744   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2745 
2746   if (oldmat->colmap) {
2747 #if defined(PETSC_USE_CTABLE)
2748     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2749 #else
2750     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2751     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2752     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2753 #endif
2754   } else a->colmap = 0;
2755   if (oldmat->garray) {
2756     PetscInt len;
2757     len  = oldmat->B->cmap->n;
2758     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2759     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2760     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2761   } else a->garray = 0;
2762 
2763   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2764   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2765   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2766   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2767   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2768   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2769   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2770   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2771   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2772   *newmat = mat;
2773   PetscFunctionReturn(0);
2774 }
2775 
2776 
2777 
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatLoad_MPIAIJ"
2780 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2781 {
2782   PetscScalar    *vals,*svals;
2783   MPI_Comm       comm;
2784   PetscErrorCode ierr;
2785   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2786   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2787   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2788   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2789   PetscInt       cend,cstart,n,*rowners;
2790   int            fd;
2791   PetscInt       bs = newMat->rmap->bs;
2792 
2793   PetscFunctionBegin;
2794   /* force binary viewer to load .info file if it has not yet done so */
2795   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2796   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2798   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2799   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2800   if (!rank) {
2801     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2802     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2803   }
2804 
2805   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2806   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2807   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2808   if (bs < 0) bs = 1;
2809 
2810   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2811   M    = header[1]; N = header[2];
2812 
2813   /* If global sizes are set, check if they are consistent with that given in the file */
2814   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2815   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2816 
2817   /* determine ownership of all (block) rows */
2818   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2819   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2820   else m = newMat->rmap->n; /* Set by user */
2821 
2822   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2823   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2824 
2825   /* First process needs enough room for process with most rows */
2826   if (!rank) {
2827     mmax = rowners[1];
2828     for (i=2; i<=size; i++) {
2829       mmax = PetscMax(mmax, rowners[i]);
2830     }
2831   } else mmax = -1;             /* unused, but compilers complain */
2832 
2833   rowners[0] = 0;
2834   for (i=2; i<=size; i++) {
2835     rowners[i] += rowners[i-1];
2836   }
2837   rstart = rowners[rank];
2838   rend   = rowners[rank+1];
2839 
2840   /* distribute row lengths to all processors */
2841   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2842   if (!rank) {
2843     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2844     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2845     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2846     for (j=0; j<m; j++) {
2847       procsnz[0] += ourlens[j];
2848     }
2849     for (i=1; i<size; i++) {
2850       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2851       /* calculate the number of nonzeros on each processor */
2852       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2853         procsnz[i] += rowlengths[j];
2854       }
2855       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2856     }
2857     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2858   } else {
2859     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2860   }
2861 
2862   if (!rank) {
2863     /* determine max buffer needed and allocate it */
2864     maxnz = 0;
2865     for (i=0; i<size; i++) {
2866       maxnz = PetscMax(maxnz,procsnz[i]);
2867     }
2868     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2869 
2870     /* read in my part of the matrix column indices  */
2871     nz   = procsnz[0];
2872     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2873     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2874 
2875     /* read in every one elses and ship off */
2876     for (i=1; i<size; i++) {
2877       nz   = procsnz[i];
2878       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2879       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2880     }
2881     ierr = PetscFree(cols);CHKERRQ(ierr);
2882   } else {
2883     /* determine buffer space needed for message */
2884     nz = 0;
2885     for (i=0; i<m; i++) {
2886       nz += ourlens[i];
2887     }
2888     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2889 
2890     /* receive message of column indices*/
2891     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2892   }
2893 
2894   /* determine column ownership if matrix is not square */
2895   if (N != M) {
2896     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2897     else n = newMat->cmap->n;
2898     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2899     cstart = cend - n;
2900   } else {
2901     cstart = rstart;
2902     cend   = rend;
2903     n      = cend - cstart;
2904   }
2905 
2906   /* loop over local rows, determining number of off diagonal entries */
2907   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2908   jj   = 0;
2909   for (i=0; i<m; i++) {
2910     for (j=0; j<ourlens[i]; j++) {
2911       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2912       jj++;
2913     }
2914   }
2915 
2916   for (i=0; i<m; i++) {
2917     ourlens[i] -= offlens[i];
2918   }
2919   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2920 
2921   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2922 
2923   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2924 
2925   for (i=0; i<m; i++) {
2926     ourlens[i] += offlens[i];
2927   }
2928 
2929   if (!rank) {
2930     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2931 
2932     /* read in my part of the matrix numerical values  */
2933     nz   = procsnz[0];
2934     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2935 
2936     /* insert into matrix */
2937     jj      = rstart;
2938     smycols = mycols;
2939     svals   = vals;
2940     for (i=0; i<m; i++) {
2941       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2942       smycols += ourlens[i];
2943       svals   += ourlens[i];
2944       jj++;
2945     }
2946 
2947     /* read in other processors and ship out */
2948     for (i=1; i<size; i++) {
2949       nz   = procsnz[i];
2950       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2951       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2952     }
2953     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2954   } else {
2955     /* receive numeric values */
2956     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2957 
2958     /* receive message of values*/
2959     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2960 
2961     /* insert into matrix */
2962     jj      = rstart;
2963     smycols = mycols;
2964     svals   = vals;
2965     for (i=0; i<m; i++) {
2966       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2967       smycols += ourlens[i];
2968       svals   += ourlens[i];
2969       jj++;
2970     }
2971   }
2972   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2973   ierr = PetscFree(vals);CHKERRQ(ierr);
2974   ierr = PetscFree(mycols);CHKERRQ(ierr);
2975   ierr = PetscFree(rowners);CHKERRQ(ierr);
2976   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2977   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 #undef __FUNCT__
2982 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2983 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
2984 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2985 {
2986   PetscErrorCode ierr;
2987   IS             iscol_local;
2988   PetscInt       csize;
2989 
2990   PetscFunctionBegin;
2991   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2992   if (call == MAT_REUSE_MATRIX) {
2993     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2994     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2995   } else {
2996     /* check if we are grabbing all columns*/
2997     PetscBool    isstride;
2998     PetscMPIInt  lisstride = 0,gisstride;
2999     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3000     if (isstride) {
3001       PetscInt  start,len,mstart,mlen;
3002       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3003       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3004       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3005       if (mstart == start && mlen-mstart == len) lisstride = 1;
3006     }
3007     ierr = MPI_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3008     if (gisstride) {
3009       PetscInt N;
3010       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3011       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3012       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3013       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3014     } else {
3015       PetscInt cbs;
3016       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3017       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3018       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3019     }
3020   }
3021   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3022   if (call == MAT_INITIAL_MATRIX) {
3023     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3024     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3025   }
3026   PetscFunctionReturn(0);
3027 }
3028 
3029 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3030 #undef __FUNCT__
3031 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3032 /*
3033     Not great since it makes two copies of the submatrix, first an SeqAIJ
3034   in local and then by concatenating the local matrices the end result.
3035   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3036 
3037   Note: This requires a sequential iscol with all indices.
3038 */
3039 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3040 {
3041   PetscErrorCode ierr;
3042   PetscMPIInt    rank,size;
3043   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3044   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3045   PetscBool      allcolumns, colflag;
3046   Mat            M,Mreuse;
3047   MatScalar      *vwork,*aa;
3048   MPI_Comm       comm;
3049   Mat_SeqAIJ     *aij;
3050 
3051   PetscFunctionBegin;
3052   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3053   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3054   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3055 
3056   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3057   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3058   if (colflag && ncol == mat->cmap->N) {
3059     allcolumns = PETSC_TRUE;
3060     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3061   } else {
3062     allcolumns = PETSC_FALSE;
3063   }
3064   if (call ==  MAT_REUSE_MATRIX) {
3065     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3066     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3067     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3068   } else {
3069     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3070   }
3071 
3072   /*
3073       m - number of local rows
3074       n - number of columns (same on all processors)
3075       rstart - first row in new global matrix generated
3076   */
3077   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3078   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3079   if (call == MAT_INITIAL_MATRIX) {
3080     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3081     ii  = aij->i;
3082     jj  = aij->j;
3083 
3084     /*
3085         Determine the number of non-zeros in the diagonal and off-diagonal
3086         portions of the matrix in order to do correct preallocation
3087     */
3088 
3089     /* first get start and end of "diagonal" columns */
3090     if (csize == PETSC_DECIDE) {
3091       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3092       if (mglobal == n) { /* square matrix */
3093         nlocal = m;
3094       } else {
3095         nlocal = n/size + ((n % size) > rank);
3096       }
3097     } else {
3098       nlocal = csize;
3099     }
3100     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3101     rstart = rend - nlocal;
3102     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3103 
3104     /* next, compute all the lengths */
3105     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3106     olens = dlens + m;
3107     for (i=0; i<m; i++) {
3108       jend = ii[i+1] - ii[i];
3109       olen = 0;
3110       dlen = 0;
3111       for (j=0; j<jend; j++) {
3112         if (*jj < rstart || *jj >= rend) olen++;
3113         else dlen++;
3114         jj++;
3115       }
3116       olens[i] = olen;
3117       dlens[i] = dlen;
3118     }
3119     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3120     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3121     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3122     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3123     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3124     ierr = PetscFree(dlens);CHKERRQ(ierr);
3125   } else {
3126     PetscInt ml,nl;
3127 
3128     M    = *newmat;
3129     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3130     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3131     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3132     /*
3133          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3134        rather than the slower MatSetValues().
3135     */
3136     M->was_assembled = PETSC_TRUE;
3137     M->assembled     = PETSC_FALSE;
3138   }
3139   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3140   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3141   ii   = aij->i;
3142   jj   = aij->j;
3143   aa   = aij->a;
3144   for (i=0; i<m; i++) {
3145     row   = rstart + i;
3146     nz    = ii[i+1] - ii[i];
3147     cwork = jj;     jj += nz;
3148     vwork = aa;     aa += nz;
3149     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3150   }
3151 
3152   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3153   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3154   *newmat = M;
3155 
3156   /* save submatrix used in processor for next request */
3157   if (call ==  MAT_INITIAL_MATRIX) {
3158     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3159     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3160   }
3161   PetscFunctionReturn(0);
3162 }
3163 
3164 #undef __FUNCT__
3165 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3166 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3167 {
3168   PetscInt       m,cstart, cend,j,nnz,i,d;
3169   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3170   const PetscInt *JJ;
3171   PetscScalar    *values;
3172   PetscErrorCode ierr;
3173 
3174   PetscFunctionBegin;
3175   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3176 
3177   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3178   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3179   m      = B->rmap->n;
3180   cstart = B->cmap->rstart;
3181   cend   = B->cmap->rend;
3182   rstart = B->rmap->rstart;
3183 
3184   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3185 
3186 #if defined(PETSC_USE_DEBUGGING)
3187   for (i=0; i<m; i++) {
3188     nnz = Ii[i+1]- Ii[i];
3189     JJ  = J + Ii[i];
3190     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3191     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3192     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3193   }
3194 #endif
3195 
3196   for (i=0; i<m; i++) {
3197     nnz     = Ii[i+1]- Ii[i];
3198     JJ      = J + Ii[i];
3199     nnz_max = PetscMax(nnz_max,nnz);
3200     d       = 0;
3201     for (j=0; j<nnz; j++) {
3202       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3203     }
3204     d_nnz[i] = d;
3205     o_nnz[i] = nnz - d;
3206   }
3207   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3208   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3209 
3210   if (v) values = (PetscScalar*)v;
3211   else {
3212     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3213   }
3214 
3215   for (i=0; i<m; i++) {
3216     ii   = i + rstart;
3217     nnz  = Ii[i+1]- Ii[i];
3218     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3219   }
3220   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3221   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3222 
3223   if (!v) {
3224     ierr = PetscFree(values);CHKERRQ(ierr);
3225   }
3226   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3227   PetscFunctionReturn(0);
3228 }
3229 
3230 #undef __FUNCT__
3231 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3232 /*@
3233    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3234    (the default parallel PETSc format).
3235 
3236    Collective on MPI_Comm
3237 
3238    Input Parameters:
3239 +  B - the matrix
3240 .  i - the indices into j for the start of each local row (starts with zero)
3241 .  j - the column indices for each local row (starts with zero)
3242 -  v - optional values in the matrix
3243 
3244    Level: developer
3245 
3246    Notes:
3247        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3248      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3249      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3250 
3251        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3252 
3253        The format which is used for the sparse matrix input, is equivalent to a
3254     row-major ordering.. i.e for the following matrix, the input data expected is
3255     as shown
3256 
3257 $        1 0 0
3258 $        2 0 3     P0
3259 $       -------
3260 $        4 5 6     P1
3261 $
3262 $     Process0 [P0]: rows_owned=[0,1]
3263 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3264 $        j =  {0,0,2}  [size = 3]
3265 $        v =  {1,2,3}  [size = 3]
3266 $
3267 $     Process1 [P1]: rows_owned=[2]
3268 $        i =  {0,3}    [size = nrow+1  = 1+1]
3269 $        j =  {0,1,2}  [size = 3]
3270 $        v =  {4,5,6}  [size = 3]
3271 
3272 .keywords: matrix, aij, compressed row, sparse, parallel
3273 
3274 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3275           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3276 @*/
3277 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3278 {
3279   PetscErrorCode ierr;
3280 
3281   PetscFunctionBegin;
3282   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3283   PetscFunctionReturn(0);
3284 }
3285 
3286 #undef __FUNCT__
3287 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3288 /*@C
3289    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3290    (the default parallel PETSc format).  For good matrix assembly performance
3291    the user should preallocate the matrix storage by setting the parameters
3292    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3293    performance can be increased by more than a factor of 50.
3294 
3295    Collective on MPI_Comm
3296 
3297    Input Parameters:
3298 +  B - the matrix
3299 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3300            (same value is used for all local rows)
3301 .  d_nnz - array containing the number of nonzeros in the various rows of the
3302            DIAGONAL portion of the local submatrix (possibly different for each row)
3303            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3304            The size of this array is equal to the number of local rows, i.e 'm'.
3305            For matrices that will be factored, you must leave room for (and set)
3306            the diagonal entry even if it is zero.
3307 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3308            submatrix (same value is used for all local rows).
3309 -  o_nnz - array containing the number of nonzeros in the various rows of the
3310            OFF-DIAGONAL portion of the local submatrix (possibly different for
3311            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3312            structure. The size of this array is equal to the number
3313            of local rows, i.e 'm'.
3314 
3315    If the *_nnz parameter is given then the *_nz parameter is ignored
3316 
3317    The AIJ format (also called the Yale sparse matrix format or
3318    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3319    storage.  The stored row and column indices begin with zero.
3320    See Users-Manual: ch_mat for details.
3321 
3322    The parallel matrix is partitioned such that the first m0 rows belong to
3323    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3324    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3325 
3326    The DIAGONAL portion of the local submatrix of a processor can be defined
3327    as the submatrix which is obtained by extraction the part corresponding to
3328    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3329    first row that belongs to the processor, r2 is the last row belonging to
3330    the this processor, and c1-c2 is range of indices of the local part of a
3331    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3332    common case of a square matrix, the row and column ranges are the same and
3333    the DIAGONAL part is also square. The remaining portion of the local
3334    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3335 
3336    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3337 
3338    You can call MatGetInfo() to get information on how effective the preallocation was;
3339    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3340    You can also run with the option -info and look for messages with the string
3341    malloc in them to see if additional memory allocation was needed.
3342 
3343    Example usage:
3344 
3345    Consider the following 8x8 matrix with 34 non-zero values, that is
3346    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3347    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3348    as follows:
3349 
3350 .vb
3351             1  2  0  |  0  3  0  |  0  4
3352     Proc0   0  5  6  |  7  0  0  |  8  0
3353             9  0 10  | 11  0  0  | 12  0
3354     -------------------------------------
3355            13  0 14  | 15 16 17  |  0  0
3356     Proc1   0 18  0  | 19 20 21  |  0  0
3357             0  0  0  | 22 23  0  | 24  0
3358     -------------------------------------
3359     Proc2  25 26 27  |  0  0 28  | 29  0
3360            30  0  0  | 31 32 33  |  0 34
3361 .ve
3362 
3363    This can be represented as a collection of submatrices as:
3364 
3365 .vb
3366       A B C
3367       D E F
3368       G H I
3369 .ve
3370 
3371    Where the submatrices A,B,C are owned by proc0, D,E,F are
3372    owned by proc1, G,H,I are owned by proc2.
3373 
3374    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3375    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3376    The 'M','N' parameters are 8,8, and have the same values on all procs.
3377 
3378    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3379    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3380    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3381    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3382    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3383    matrix, ans [DF] as another SeqAIJ matrix.
3384 
3385    When d_nz, o_nz parameters are specified, d_nz storage elements are
3386    allocated for every row of the local diagonal submatrix, and o_nz
3387    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3388    One way to choose d_nz and o_nz is to use the max nonzerors per local
3389    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3390    In this case, the values of d_nz,o_nz are:
3391 .vb
3392      proc0 : dnz = 2, o_nz = 2
3393      proc1 : dnz = 3, o_nz = 2
3394      proc2 : dnz = 1, o_nz = 4
3395 .ve
3396    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3397    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3398    for proc3. i.e we are using 12+15+10=37 storage locations to store
3399    34 values.
3400 
3401    When d_nnz, o_nnz parameters are specified, the storage is specified
3402    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3403    In the above case the values for d_nnz,o_nnz are:
3404 .vb
3405      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3406      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3407      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3408 .ve
3409    Here the space allocated is sum of all the above values i.e 34, and
3410    hence pre-allocation is perfect.
3411 
3412    Level: intermediate
3413 
3414 .keywords: matrix, aij, compressed row, sparse, parallel
3415 
3416 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3417           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3418 @*/
3419 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3420 {
3421   PetscErrorCode ierr;
3422 
3423   PetscFunctionBegin;
3424   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3425   PetscValidType(B,1);
3426   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3427   PetscFunctionReturn(0);
3428 }
3429 
3430 #undef __FUNCT__
3431 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3432 /*@
3433      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3434          CSR format the local rows.
3435 
3436    Collective on MPI_Comm
3437 
3438    Input Parameters:
3439 +  comm - MPI communicator
3440 .  m - number of local rows (Cannot be PETSC_DECIDE)
3441 .  n - This value should be the same as the local size used in creating the
3442        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3443        calculated if N is given) For square matrices n is almost always m.
3444 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3445 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3446 .   i - row indices
3447 .   j - column indices
3448 -   a - matrix values
3449 
3450    Output Parameter:
3451 .   mat - the matrix
3452 
3453    Level: intermediate
3454 
3455    Notes:
3456        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3457      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3458      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3459 
3460        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3461 
3462        The format which is used for the sparse matrix input, is equivalent to a
3463     row-major ordering.. i.e for the following matrix, the input data expected is
3464     as shown
3465 
3466 $        1 0 0
3467 $        2 0 3     P0
3468 $       -------
3469 $        4 5 6     P1
3470 $
3471 $     Process0 [P0]: rows_owned=[0,1]
3472 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3473 $        j =  {0,0,2}  [size = 3]
3474 $        v =  {1,2,3}  [size = 3]
3475 $
3476 $     Process1 [P1]: rows_owned=[2]
3477 $        i =  {0,3}    [size = nrow+1  = 1+1]
3478 $        j =  {0,1,2}  [size = 3]
3479 $        v =  {4,5,6}  [size = 3]
3480 
3481 .keywords: matrix, aij, compressed row, sparse, parallel
3482 
3483 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3484           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3485 @*/
3486 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3487 {
3488   PetscErrorCode ierr;
3489 
3490   PetscFunctionBegin;
3491   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3492   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3493   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3494   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3495   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3496   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3497   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3498   PetscFunctionReturn(0);
3499 }
3500 
3501 #undef __FUNCT__
3502 #define __FUNCT__ "MatCreateAIJ"
3503 /*@C
3504    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3505    (the default parallel PETSc format).  For good matrix assembly performance
3506    the user should preallocate the matrix storage by setting the parameters
3507    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3508    performance can be increased by more than a factor of 50.
3509 
3510    Collective on MPI_Comm
3511 
3512    Input Parameters:
3513 +  comm - MPI communicator
3514 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3515            This value should be the same as the local size used in creating the
3516            y vector for the matrix-vector product y = Ax.
3517 .  n - This value should be the same as the local size used in creating the
3518        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3519        calculated if N is given) For square matrices n is almost always m.
3520 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3521 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3522 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3523            (same value is used for all local rows)
3524 .  d_nnz - array containing the number of nonzeros in the various rows of the
3525            DIAGONAL portion of the local submatrix (possibly different for each row)
3526            or NULL, if d_nz is used to specify the nonzero structure.
3527            The size of this array is equal to the number of local rows, i.e 'm'.
3528 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3529            submatrix (same value is used for all local rows).
3530 -  o_nnz - array containing the number of nonzeros in the various rows of the
3531            OFF-DIAGONAL portion of the local submatrix (possibly different for
3532            each row) or NULL, if o_nz is used to specify the nonzero
3533            structure. The size of this array is equal to the number
3534            of local rows, i.e 'm'.
3535 
3536    Output Parameter:
3537 .  A - the matrix
3538 
3539    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3540    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3541    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3542 
3543    Notes:
3544    If the *_nnz parameter is given then the *_nz parameter is ignored
3545 
3546    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3547    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3548    storage requirements for this matrix.
3549 
3550    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3551    processor than it must be used on all processors that share the object for
3552    that argument.
3553 
3554    The user MUST specify either the local or global matrix dimensions
3555    (possibly both).
3556 
3557    The parallel matrix is partitioned across processors such that the
3558    first m0 rows belong to process 0, the next m1 rows belong to
3559    process 1, the next m2 rows belong to process 2 etc.. where
3560    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3561    values corresponding to [m x N] submatrix.
3562 
3563    The columns are logically partitioned with the n0 columns belonging
3564    to 0th partition, the next n1 columns belonging to the next
3565    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3566 
3567    The DIAGONAL portion of the local submatrix on any given processor
3568    is the submatrix corresponding to the rows and columns m,n
3569    corresponding to the given processor. i.e diagonal matrix on
3570    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3571    etc. The remaining portion of the local submatrix [m x (N-n)]
3572    constitute the OFF-DIAGONAL portion. The example below better
3573    illustrates this concept.
3574 
3575    For a square global matrix we define each processor's diagonal portion
3576    to be its local rows and the corresponding columns (a square submatrix);
3577    each processor's off-diagonal portion encompasses the remainder of the
3578    local matrix (a rectangular submatrix).
3579 
3580    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3581 
3582    When calling this routine with a single process communicator, a matrix of
3583    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3584    type of communicator, use the construction mechanism:
3585      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3586 
3587    By default, this format uses inodes (identical nodes) when possible.
3588    We search for consecutive rows with the same nonzero structure, thereby
3589    reusing matrix information to achieve increased efficiency.
3590 
3591    Options Database Keys:
3592 +  -mat_no_inode  - Do not use inodes
3593 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3594 -  -mat_aij_oneindex - Internally use indexing starting at 1
3595         rather than 0.  Note that when calling MatSetValues(),
3596         the user still MUST index entries starting at 0!
3597 
3598 
3599    Example usage:
3600 
3601    Consider the following 8x8 matrix with 34 non-zero values, that is
3602    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3603    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3604    as follows:
3605 
3606 .vb
3607             1  2  0  |  0  3  0  |  0  4
3608     Proc0   0  5  6  |  7  0  0  |  8  0
3609             9  0 10  | 11  0  0  | 12  0
3610     -------------------------------------
3611            13  0 14  | 15 16 17  |  0  0
3612     Proc1   0 18  0  | 19 20 21  |  0  0
3613             0  0  0  | 22 23  0  | 24  0
3614     -------------------------------------
3615     Proc2  25 26 27  |  0  0 28  | 29  0
3616            30  0  0  | 31 32 33  |  0 34
3617 .ve
3618 
3619    This can be represented as a collection of submatrices as:
3620 
3621 .vb
3622       A B C
3623       D E F
3624       G H I
3625 .ve
3626 
3627    Where the submatrices A,B,C are owned by proc0, D,E,F are
3628    owned by proc1, G,H,I are owned by proc2.
3629 
3630    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3631    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3632    The 'M','N' parameters are 8,8, and have the same values on all procs.
3633 
3634    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3635    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3636    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3637    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3638    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3639    matrix, ans [DF] as another SeqAIJ matrix.
3640 
3641    When d_nz, o_nz parameters are specified, d_nz storage elements are
3642    allocated for every row of the local diagonal submatrix, and o_nz
3643    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3644    One way to choose d_nz and o_nz is to use the max nonzerors per local
3645    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3646    In this case, the values of d_nz,o_nz are:
3647 .vb
3648      proc0 : dnz = 2, o_nz = 2
3649      proc1 : dnz = 3, o_nz = 2
3650      proc2 : dnz = 1, o_nz = 4
3651 .ve
3652    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3653    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3654    for proc3. i.e we are using 12+15+10=37 storage locations to store
3655    34 values.
3656 
3657    When d_nnz, o_nnz parameters are specified, the storage is specified
3658    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3659    In the above case the values for d_nnz,o_nnz are:
3660 .vb
3661      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3662      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3663      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3664 .ve
3665    Here the space allocated is sum of all the above values i.e 34, and
3666    hence pre-allocation is perfect.
3667 
3668    Level: intermediate
3669 
3670 .keywords: matrix, aij, compressed row, sparse, parallel
3671 
3672 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3673           MPIAIJ, MatCreateMPIAIJWithArrays()
3674 @*/
3675 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3676 {
3677   PetscErrorCode ierr;
3678   PetscMPIInt    size;
3679 
3680   PetscFunctionBegin;
3681   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3682   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3683   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3684   if (size > 1) {
3685     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3686     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3687   } else {
3688     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3689     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3690   }
3691   PetscFunctionReturn(0);
3692 }
3693 
3694 #undef __FUNCT__
3695 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3696 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3697 {
3698   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3699 
3700   PetscFunctionBegin;
3701   if (Ad)     *Ad     = a->A;
3702   if (Ao)     *Ao     = a->B;
3703   if (colmap) *colmap = a->garray;
3704   PetscFunctionReturn(0);
3705 }
3706 
3707 #undef __FUNCT__
3708 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3709 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3710 {
3711   PetscErrorCode ierr;
3712   PetscInt       i;
3713   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3714 
3715   PetscFunctionBegin;
3716   if (coloring->ctype == IS_COLORING_GLOBAL) {
3717     ISColoringValue *allcolors,*colors;
3718     ISColoring      ocoloring;
3719 
3720     /* set coloring for diagonal portion */
3721     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3722 
3723     /* set coloring for off-diagonal portion */
3724     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3725     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3726     for (i=0; i<a->B->cmap->n; i++) {
3727       colors[i] = allcolors[a->garray[i]];
3728     }
3729     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3730     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3731     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3732     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3733   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3734     ISColoringValue *colors;
3735     PetscInt        *larray;
3736     ISColoring      ocoloring;
3737 
3738     /* set coloring for diagonal portion */
3739     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3740     for (i=0; i<a->A->cmap->n; i++) {
3741       larray[i] = i + A->cmap->rstart;
3742     }
3743     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3744     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3745     for (i=0; i<a->A->cmap->n; i++) {
3746       colors[i] = coloring->colors[larray[i]];
3747     }
3748     ierr = PetscFree(larray);CHKERRQ(ierr);
3749     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3750     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3751     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3752 
3753     /* set coloring for off-diagonal portion */
3754     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3755     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3756     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3757     for (i=0; i<a->B->cmap->n; i++) {
3758       colors[i] = coloring->colors[larray[i]];
3759     }
3760     ierr = PetscFree(larray);CHKERRQ(ierr);
3761     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3762     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3763     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3764   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3765   PetscFunctionReturn(0);
3766 }
3767 
3768 #undef __FUNCT__
3769 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3770 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3771 {
3772   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3773   PetscErrorCode ierr;
3774 
3775   PetscFunctionBegin;
3776   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3777   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3778   PetscFunctionReturn(0);
3779 }
3780 
3781 #undef __FUNCT__
3782 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3783 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3784 {
3785   PetscErrorCode ierr;
3786   PetscInt       m,N,i,rstart,nnz,Ii;
3787   PetscInt       *indx;
3788   PetscScalar    *values;
3789 
3790   PetscFunctionBegin;
3791   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3792   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3793     PetscInt       *dnz,*onz,sum,bs,cbs;
3794 
3795     if (n == PETSC_DECIDE) {
3796       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3797     }
3798     /* Check sum(n) = N */
3799     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3800     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3801 
3802     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3803     rstart -= m;
3804 
3805     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3806     for (i=0; i<m; i++) {
3807       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3808       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3809       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3810     }
3811 
3812     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3813     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3814     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3815     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3816     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3817     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3818     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3819   }
3820 
3821   /* numeric phase */
3822   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3823   for (i=0; i<m; i++) {
3824     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3825     Ii   = i + rstart;
3826     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3827     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3828   }
3829   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3831   PetscFunctionReturn(0);
3832 }
3833 
3834 #undef __FUNCT__
3835 #define __FUNCT__ "MatFileSplit"
3836 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3837 {
3838   PetscErrorCode    ierr;
3839   PetscMPIInt       rank;
3840   PetscInt          m,N,i,rstart,nnz;
3841   size_t            len;
3842   const PetscInt    *indx;
3843   PetscViewer       out;
3844   char              *name;
3845   Mat               B;
3846   const PetscScalar *values;
3847 
3848   PetscFunctionBegin;
3849   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3850   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3851   /* Should this be the type of the diagonal block of A? */
3852   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3853   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3854   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3855   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3856   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3857   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3858   for (i=0; i<m; i++) {
3859     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3860     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3861     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3862   }
3863   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3864   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3865 
3866   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3867   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3868   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3869   sprintf(name,"%s.%d",outfile,rank);
3870   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3871   ierr = PetscFree(name);CHKERRQ(ierr);
3872   ierr = MatView(B,out);CHKERRQ(ierr);
3873   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3874   ierr = MatDestroy(&B);CHKERRQ(ierr);
3875   PetscFunctionReturn(0);
3876 }
3877 
3878 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3879 #undef __FUNCT__
3880 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3881 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3882 {
3883   PetscErrorCode      ierr;
3884   Mat_Merge_SeqsToMPI *merge;
3885   PetscContainer      container;
3886 
3887   PetscFunctionBegin;
3888   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3889   if (container) {
3890     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3891     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3892     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3893     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3894     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3895     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3896     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3897     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3898     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3899     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3900     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3901     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3902     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3903     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3904     ierr = PetscFree(merge);CHKERRQ(ierr);
3905     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3906   }
3907   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3908   PetscFunctionReturn(0);
3909 }
3910 
3911 #include <../src/mat/utils/freespace.h>
3912 #include <petscbt.h>
3913 
3914 #undef __FUNCT__
3915 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3916 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3917 {
3918   PetscErrorCode      ierr;
3919   MPI_Comm            comm;
3920   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3921   PetscMPIInt         size,rank,taga,*len_s;
3922   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3923   PetscInt            proc,m;
3924   PetscInt            **buf_ri,**buf_rj;
3925   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3926   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3927   MPI_Request         *s_waits,*r_waits;
3928   MPI_Status          *status;
3929   MatScalar           *aa=a->a;
3930   MatScalar           **abuf_r,*ba_i;
3931   Mat_Merge_SeqsToMPI *merge;
3932   PetscContainer      container;
3933 
3934   PetscFunctionBegin;
3935   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3936   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3937 
3938   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3939   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3940 
3941   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3942   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3943 
3944   bi     = merge->bi;
3945   bj     = merge->bj;
3946   buf_ri = merge->buf_ri;
3947   buf_rj = merge->buf_rj;
3948 
3949   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3950   owners = merge->rowmap->range;
3951   len_s  = merge->len_s;
3952 
3953   /* send and recv matrix values */
3954   /*-----------------------------*/
3955   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3956   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3957 
3958   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
3959   for (proc=0,k=0; proc<size; proc++) {
3960     if (!len_s[proc]) continue;
3961     i    = owners[proc];
3962     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3963     k++;
3964   }
3965 
3966   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3967   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3968   ierr = PetscFree(status);CHKERRQ(ierr);
3969 
3970   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3971   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3972 
3973   /* insert mat values of mpimat */
3974   /*----------------------------*/
3975   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
3976   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
3977 
3978   for (k=0; k<merge->nrecv; k++) {
3979     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3980     nrows       = *(buf_ri_k[k]);
3981     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3982     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
3983   }
3984 
3985   /* set values of ba */
3986   m = merge->rowmap->n;
3987   for (i=0; i<m; i++) {
3988     arow = owners[rank] + i;
3989     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3990     bnzi = bi[i+1] - bi[i];
3991     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3992 
3993     /* add local non-zero vals of this proc's seqmat into ba */
3994     anzi   = ai[arow+1] - ai[arow];
3995     aj     = a->j + ai[arow];
3996     aa     = a->a + ai[arow];
3997     nextaj = 0;
3998     for (j=0; nextaj<anzi; j++) {
3999       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4000         ba_i[j] += aa[nextaj++];
4001       }
4002     }
4003 
4004     /* add received vals into ba */
4005     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4006       /* i-th row */
4007       if (i == *nextrow[k]) {
4008         anzi   = *(nextai[k]+1) - *nextai[k];
4009         aj     = buf_rj[k] + *(nextai[k]);
4010         aa     = abuf_r[k] + *(nextai[k]);
4011         nextaj = 0;
4012         for (j=0; nextaj<anzi; j++) {
4013           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4014             ba_i[j] += aa[nextaj++];
4015           }
4016         }
4017         nextrow[k]++; nextai[k]++;
4018       }
4019     }
4020     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4021   }
4022   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4023   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4024 
4025   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4026   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4027   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4028   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4029   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4030   PetscFunctionReturn(0);
4031 }
4032 
4033 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4034 
4035 #undef __FUNCT__
4036 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4037 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4038 {
4039   PetscErrorCode      ierr;
4040   Mat                 B_mpi;
4041   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4042   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4043   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4044   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4045   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4046   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4047   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4048   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4049   MPI_Status          *status;
4050   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4051   PetscBT             lnkbt;
4052   Mat_Merge_SeqsToMPI *merge;
4053   PetscContainer      container;
4054 
4055   PetscFunctionBegin;
4056   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4057 
4058   /* make sure it is a PETSc comm */
4059   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4060   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4061   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4062 
4063   ierr = PetscNew(&merge);CHKERRQ(ierr);
4064   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4065 
4066   /* determine row ownership */
4067   /*---------------------------------------------------------*/
4068   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4069   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4070   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4071   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4072   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4073   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4074   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4075 
4076   m      = merge->rowmap->n;
4077   owners = merge->rowmap->range;
4078 
4079   /* determine the number of messages to send, their lengths */
4080   /*---------------------------------------------------------*/
4081   len_s = merge->len_s;
4082 
4083   len          = 0; /* length of buf_si[] */
4084   merge->nsend = 0;
4085   for (proc=0; proc<size; proc++) {
4086     len_si[proc] = 0;
4087     if (proc == rank) {
4088       len_s[proc] = 0;
4089     } else {
4090       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4091       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4092     }
4093     if (len_s[proc]) {
4094       merge->nsend++;
4095       nrows = 0;
4096       for (i=owners[proc]; i<owners[proc+1]; i++) {
4097         if (ai[i+1] > ai[i]) nrows++;
4098       }
4099       len_si[proc] = 2*(nrows+1);
4100       len         += len_si[proc];
4101     }
4102   }
4103 
4104   /* determine the number and length of messages to receive for ij-structure */
4105   /*-------------------------------------------------------------------------*/
4106   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4107   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4108 
4109   /* post the Irecv of j-structure */
4110   /*-------------------------------*/
4111   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4112   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4113 
4114   /* post the Isend of j-structure */
4115   /*--------------------------------*/
4116   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4117 
4118   for (proc=0, k=0; proc<size; proc++) {
4119     if (!len_s[proc]) continue;
4120     i    = owners[proc];
4121     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4122     k++;
4123   }
4124 
4125   /* receives and sends of j-structure are complete */
4126   /*------------------------------------------------*/
4127   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4128   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4129 
4130   /* send and recv i-structure */
4131   /*---------------------------*/
4132   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4133   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4134 
4135   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4136   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4137   for (proc=0,k=0; proc<size; proc++) {
4138     if (!len_s[proc]) continue;
4139     /* form outgoing message for i-structure:
4140          buf_si[0]:                 nrows to be sent
4141                [1:nrows]:           row index (global)
4142                [nrows+1:2*nrows+1]: i-structure index
4143     */
4144     /*-------------------------------------------*/
4145     nrows       = len_si[proc]/2 - 1;
4146     buf_si_i    = buf_si + nrows+1;
4147     buf_si[0]   = nrows;
4148     buf_si_i[0] = 0;
4149     nrows       = 0;
4150     for (i=owners[proc]; i<owners[proc+1]; i++) {
4151       anzi = ai[i+1] - ai[i];
4152       if (anzi) {
4153         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4154         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4155         nrows++;
4156       }
4157     }
4158     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4159     k++;
4160     buf_si += len_si[proc];
4161   }
4162 
4163   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4164   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4165 
4166   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4167   for (i=0; i<merge->nrecv; i++) {
4168     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4169   }
4170 
4171   ierr = PetscFree(len_si);CHKERRQ(ierr);
4172   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4173   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4174   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4175   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4176   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4177   ierr = PetscFree(status);CHKERRQ(ierr);
4178 
4179   /* compute a local seq matrix in each processor */
4180   /*----------------------------------------------*/
4181   /* allocate bi array and free space for accumulating nonzero column info */
4182   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4183   bi[0] = 0;
4184 
4185   /* create and initialize a linked list */
4186   nlnk = N+1;
4187   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4188 
4189   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4190   len  = ai[owners[rank+1]] - ai[owners[rank]];
4191   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4192 
4193   current_space = free_space;
4194 
4195   /* determine symbolic info for each local row */
4196   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4197 
4198   for (k=0; k<merge->nrecv; k++) {
4199     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4200     nrows       = *buf_ri_k[k];
4201     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4202     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4203   }
4204 
4205   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4206   len  = 0;
4207   for (i=0; i<m; i++) {
4208     bnzi = 0;
4209     /* add local non-zero cols of this proc's seqmat into lnk */
4210     arow  = owners[rank] + i;
4211     anzi  = ai[arow+1] - ai[arow];
4212     aj    = a->j + ai[arow];
4213     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4214     bnzi += nlnk;
4215     /* add received col data into lnk */
4216     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4217       if (i == *nextrow[k]) { /* i-th row */
4218         anzi  = *(nextai[k]+1) - *nextai[k];
4219         aj    = buf_rj[k] + *nextai[k];
4220         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4221         bnzi += nlnk;
4222         nextrow[k]++; nextai[k]++;
4223       }
4224     }
4225     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4226 
4227     /* if free space is not available, make more free space */
4228     if (current_space->local_remaining<bnzi) {
4229       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4230       nspacedouble++;
4231     }
4232     /* copy data into free space, then initialize lnk */
4233     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4234     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4235 
4236     current_space->array           += bnzi;
4237     current_space->local_used      += bnzi;
4238     current_space->local_remaining -= bnzi;
4239 
4240     bi[i+1] = bi[i] + bnzi;
4241   }
4242 
4243   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4244 
4245   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4246   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4247   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4248 
4249   /* create symbolic parallel matrix B_mpi */
4250   /*---------------------------------------*/
4251   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4252   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4253   if (n==PETSC_DECIDE) {
4254     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4255   } else {
4256     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4257   }
4258   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4259   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4260   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4261   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4262   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4263 
4264   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4265   B_mpi->assembled    = PETSC_FALSE;
4266   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4267   merge->bi           = bi;
4268   merge->bj           = bj;
4269   merge->buf_ri       = buf_ri;
4270   merge->buf_rj       = buf_rj;
4271   merge->coi          = NULL;
4272   merge->coj          = NULL;
4273   merge->owners_co    = NULL;
4274 
4275   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4276 
4277   /* attach the supporting struct to B_mpi for reuse */
4278   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4279   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4280   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4281   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4282   *mpimat = B_mpi;
4283 
4284   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4285   PetscFunctionReturn(0);
4286 }
4287 
4288 #undef __FUNCT__
4289 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4290 /*@C
4291       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4292                  matrices from each processor
4293 
4294     Collective on MPI_Comm
4295 
4296    Input Parameters:
4297 +    comm - the communicators the parallel matrix will live on
4298 .    seqmat - the input sequential matrices
4299 .    m - number of local rows (or PETSC_DECIDE)
4300 .    n - number of local columns (or PETSC_DECIDE)
4301 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4302 
4303    Output Parameter:
4304 .    mpimat - the parallel matrix generated
4305 
4306     Level: advanced
4307 
4308    Notes:
4309      The dimensions of the sequential matrix in each processor MUST be the same.
4310      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4311      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4312 @*/
4313 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4314 {
4315   PetscErrorCode ierr;
4316   PetscMPIInt    size;
4317 
4318   PetscFunctionBegin;
4319   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4320   if (size == 1) {
4321     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4322     if (scall == MAT_INITIAL_MATRIX) {
4323       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4324     } else {
4325       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4326     }
4327     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4328     PetscFunctionReturn(0);
4329   }
4330   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4331   if (scall == MAT_INITIAL_MATRIX) {
4332     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4333   }
4334   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4335   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4336   PetscFunctionReturn(0);
4337 }
4338 
4339 #undef __FUNCT__
4340 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4341 /*@
4342      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4343           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4344           with MatGetSize()
4345 
4346     Not Collective
4347 
4348    Input Parameters:
4349 +    A - the matrix
4350 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4351 
4352    Output Parameter:
4353 .    A_loc - the local sequential matrix generated
4354 
4355     Level: developer
4356 
4357 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4358 
4359 @*/
4360 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4361 {
4362   PetscErrorCode ierr;
4363   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4364   Mat_SeqAIJ     *mat,*a,*b;
4365   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4366   MatScalar      *aa,*ba,*cam;
4367   PetscScalar    *ca;
4368   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4369   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4370   PetscBool      match;
4371   MPI_Comm       comm;
4372   PetscMPIInt    size;
4373 
4374   PetscFunctionBegin;
4375   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4376   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4377   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4378   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4379   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4380 
4381   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4382   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4383   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4384   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4385   aa = a->a; ba = b->a;
4386   if (scall == MAT_INITIAL_MATRIX) {
4387     if (size == 1) {
4388       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4389       PetscFunctionReturn(0);
4390     }
4391 
4392     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4393     ci[0] = 0;
4394     for (i=0; i<am; i++) {
4395       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4396     }
4397     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4398     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4399     k    = 0;
4400     for (i=0; i<am; i++) {
4401       ncols_o = bi[i+1] - bi[i];
4402       ncols_d = ai[i+1] - ai[i];
4403       /* off-diagonal portion of A */
4404       for (jo=0; jo<ncols_o; jo++) {
4405         col = cmap[*bj];
4406         if (col >= cstart) break;
4407         cj[k]   = col; bj++;
4408         ca[k++] = *ba++;
4409       }
4410       /* diagonal portion of A */
4411       for (j=0; j<ncols_d; j++) {
4412         cj[k]   = cstart + *aj++;
4413         ca[k++] = *aa++;
4414       }
4415       /* off-diagonal portion of A */
4416       for (j=jo; j<ncols_o; j++) {
4417         cj[k]   = cmap[*bj++];
4418         ca[k++] = *ba++;
4419       }
4420     }
4421     /* put together the new matrix */
4422     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4423     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4424     /* Since these are PETSc arrays, change flags to free them as necessary. */
4425     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4426     mat->free_a  = PETSC_TRUE;
4427     mat->free_ij = PETSC_TRUE;
4428     mat->nonew   = 0;
4429   } else if (scall == MAT_REUSE_MATRIX) {
4430     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4431     ci = mat->i; cj = mat->j; cam = mat->a;
4432     for (i=0; i<am; i++) {
4433       /* off-diagonal portion of A */
4434       ncols_o = bi[i+1] - bi[i];
4435       for (jo=0; jo<ncols_o; jo++) {
4436         col = cmap[*bj];
4437         if (col >= cstart) break;
4438         *cam++ = *ba++; bj++;
4439       }
4440       /* diagonal portion of A */
4441       ncols_d = ai[i+1] - ai[i];
4442       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4443       /* off-diagonal portion of A */
4444       for (j=jo; j<ncols_o; j++) {
4445         *cam++ = *ba++; bj++;
4446       }
4447     }
4448   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4449   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4450   PetscFunctionReturn(0);
4451 }
4452 
4453 #undef __FUNCT__
4454 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4455 /*@C
4456      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4457 
4458     Not Collective
4459 
4460    Input Parameters:
4461 +    A - the matrix
4462 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4463 -    row, col - index sets of rows and columns to extract (or NULL)
4464 
4465    Output Parameter:
4466 .    A_loc - the local sequential matrix generated
4467 
4468     Level: developer
4469 
4470 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4471 
4472 @*/
4473 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4474 {
4475   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4476   PetscErrorCode ierr;
4477   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4478   IS             isrowa,iscola;
4479   Mat            *aloc;
4480   PetscBool      match;
4481 
4482   PetscFunctionBegin;
4483   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4484   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4485   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4486   if (!row) {
4487     start = A->rmap->rstart; end = A->rmap->rend;
4488     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4489   } else {
4490     isrowa = *row;
4491   }
4492   if (!col) {
4493     start = A->cmap->rstart;
4494     cmap  = a->garray;
4495     nzA   = a->A->cmap->n;
4496     nzB   = a->B->cmap->n;
4497     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4498     ncols = 0;
4499     for (i=0; i<nzB; i++) {
4500       if (cmap[i] < start) idx[ncols++] = cmap[i];
4501       else break;
4502     }
4503     imark = i;
4504     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4505     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4506     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4507   } else {
4508     iscola = *col;
4509   }
4510   if (scall != MAT_INITIAL_MATRIX) {
4511     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4512     aloc[0] = *A_loc;
4513   }
4514   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4515   *A_loc = aloc[0];
4516   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4517   if (!row) {
4518     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4519   }
4520   if (!col) {
4521     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4522   }
4523   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4524   PetscFunctionReturn(0);
4525 }
4526 
4527 #undef __FUNCT__
4528 #define __FUNCT__ "MatGetBrowsOfAcols"
4529 /*@C
4530     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4531 
4532     Collective on Mat
4533 
4534    Input Parameters:
4535 +    A,B - the matrices in mpiaij format
4536 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4537 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4538 
4539    Output Parameter:
4540 +    rowb, colb - index sets of rows and columns of B to extract
4541 -    B_seq - the sequential matrix generated
4542 
4543     Level: developer
4544 
4545 @*/
4546 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4547 {
4548   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4549   PetscErrorCode ierr;
4550   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4551   IS             isrowb,iscolb;
4552   Mat            *bseq=NULL;
4553 
4554   PetscFunctionBegin;
4555   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4556     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4557   }
4558   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4559 
4560   if (scall == MAT_INITIAL_MATRIX) {
4561     start = A->cmap->rstart;
4562     cmap  = a->garray;
4563     nzA   = a->A->cmap->n;
4564     nzB   = a->B->cmap->n;
4565     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4566     ncols = 0;
4567     for (i=0; i<nzB; i++) {  /* row < local row index */
4568       if (cmap[i] < start) idx[ncols++] = cmap[i];
4569       else break;
4570     }
4571     imark = i;
4572     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4573     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4574     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4575     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4576   } else {
4577     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4578     isrowb  = *rowb; iscolb = *colb;
4579     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4580     bseq[0] = *B_seq;
4581   }
4582   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4583   *B_seq = bseq[0];
4584   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4585   if (!rowb) {
4586     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4587   } else {
4588     *rowb = isrowb;
4589   }
4590   if (!colb) {
4591     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4592   } else {
4593     *colb = iscolb;
4594   }
4595   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4596   PetscFunctionReturn(0);
4597 }
4598 
4599 #undef __FUNCT__
4600 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4601 /*
4602     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4603     of the OFF-DIAGONAL portion of local A
4604 
4605     Collective on Mat
4606 
4607    Input Parameters:
4608 +    A,B - the matrices in mpiaij format
4609 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4610 
4611    Output Parameter:
4612 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4613 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4614 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4615 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4616 
4617     Level: developer
4618 
4619 */
4620 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4621 {
4622   VecScatter_MPI_General *gen_to,*gen_from;
4623   PetscErrorCode         ierr;
4624   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4625   Mat_SeqAIJ             *b_oth;
4626   VecScatter             ctx =a->Mvctx;
4627   MPI_Comm               comm;
4628   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4629   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4630   PetscScalar            *rvalues,*svalues;
4631   MatScalar              *b_otha,*bufa,*bufA;
4632   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4633   MPI_Request            *rwaits = NULL,*swaits = NULL;
4634   MPI_Status             *sstatus,rstatus;
4635   PetscMPIInt            jj,size;
4636   PetscInt               *cols,sbs,rbs;
4637   PetscScalar            *vals;
4638 
4639   PetscFunctionBegin;
4640   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4641   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4642 
4643   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4644     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4645   }
4646   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4647   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4648 
4649   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4650   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4651   rvalues  = gen_from->values; /* holds the length of receiving row */
4652   svalues  = gen_to->values;   /* holds the length of sending row */
4653   nrecvs   = gen_from->n;
4654   nsends   = gen_to->n;
4655 
4656   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4657   srow    = gen_to->indices;    /* local row index to be sent */
4658   sstarts = gen_to->starts;
4659   sprocs  = gen_to->procs;
4660   sstatus = gen_to->sstatus;
4661   sbs     = gen_to->bs;
4662   rstarts = gen_from->starts;
4663   rprocs  = gen_from->procs;
4664   rbs     = gen_from->bs;
4665 
4666   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4667   if (scall == MAT_INITIAL_MATRIX) {
4668     /* i-array */
4669     /*---------*/
4670     /*  post receives */
4671     for (i=0; i<nrecvs; i++) {
4672       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4673       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4674       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4675     }
4676 
4677     /* pack the outgoing message */
4678     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4679 
4680     sstartsj[0] = 0;
4681     rstartsj[0] = 0;
4682     len         = 0; /* total length of j or a array to be sent */
4683     k           = 0;
4684     for (i=0; i<nsends; i++) {
4685       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4686       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4687       for (j=0; j<nrows; j++) {
4688         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4689         for (l=0; l<sbs; l++) {
4690           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4691 
4692           rowlen[j*sbs+l] = ncols;
4693 
4694           len += ncols;
4695           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4696         }
4697         k++;
4698       }
4699       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4700 
4701       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4702     }
4703     /* recvs and sends of i-array are completed */
4704     i = nrecvs;
4705     while (i--) {
4706       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4707     }
4708     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4709 
4710     /* allocate buffers for sending j and a arrays */
4711     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4712     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4713 
4714     /* create i-array of B_oth */
4715     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4716 
4717     b_othi[0] = 0;
4718     len       = 0; /* total length of j or a array to be received */
4719     k         = 0;
4720     for (i=0; i<nrecvs; i++) {
4721       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4722       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4723       for (j=0; j<nrows; j++) {
4724         b_othi[k+1] = b_othi[k] + rowlen[j];
4725         len        += rowlen[j]; k++;
4726       }
4727       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4728     }
4729 
4730     /* allocate space for j and a arrrays of B_oth */
4731     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4732     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4733 
4734     /* j-array */
4735     /*---------*/
4736     /*  post receives of j-array */
4737     for (i=0; i<nrecvs; i++) {
4738       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4739       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4740     }
4741 
4742     /* pack the outgoing message j-array */
4743     k = 0;
4744     for (i=0; i<nsends; i++) {
4745       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4746       bufJ  = bufj+sstartsj[i];
4747       for (j=0; j<nrows; j++) {
4748         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4749         for (ll=0; ll<sbs; ll++) {
4750           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4751           for (l=0; l<ncols; l++) {
4752             *bufJ++ = cols[l];
4753           }
4754           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4755         }
4756       }
4757       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4758     }
4759 
4760     /* recvs and sends of j-array are completed */
4761     i = nrecvs;
4762     while (i--) {
4763       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4764     }
4765     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4766   } else if (scall == MAT_REUSE_MATRIX) {
4767     sstartsj = *startsj_s;
4768     rstartsj = *startsj_r;
4769     bufa     = *bufa_ptr;
4770     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4771     b_otha   = b_oth->a;
4772   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4773 
4774   /* a-array */
4775   /*---------*/
4776   /*  post receives of a-array */
4777   for (i=0; i<nrecvs; i++) {
4778     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4779     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4780   }
4781 
4782   /* pack the outgoing message a-array */
4783   k = 0;
4784   for (i=0; i<nsends; i++) {
4785     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4786     bufA  = bufa+sstartsj[i];
4787     for (j=0; j<nrows; j++) {
4788       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4789       for (ll=0; ll<sbs; ll++) {
4790         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4791         for (l=0; l<ncols; l++) {
4792           *bufA++ = vals[l];
4793         }
4794         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4795       }
4796     }
4797     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4798   }
4799   /* recvs and sends of a-array are completed */
4800   i = nrecvs;
4801   while (i--) {
4802     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4803   }
4804   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4805   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4806 
4807   if (scall == MAT_INITIAL_MATRIX) {
4808     /* put together the new matrix */
4809     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4810 
4811     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4812     /* Since these are PETSc arrays, change flags to free them as necessary. */
4813     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4814     b_oth->free_a  = PETSC_TRUE;
4815     b_oth->free_ij = PETSC_TRUE;
4816     b_oth->nonew   = 0;
4817 
4818     ierr = PetscFree(bufj);CHKERRQ(ierr);
4819     if (!startsj_s || !bufa_ptr) {
4820       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4821       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4822     } else {
4823       *startsj_s = sstartsj;
4824       *startsj_r = rstartsj;
4825       *bufa_ptr  = bufa;
4826     }
4827   }
4828   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4829   PetscFunctionReturn(0);
4830 }
4831 
4832 #undef __FUNCT__
4833 #define __FUNCT__ "MatGetCommunicationStructs"
4834 /*@C
4835   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4836 
4837   Not Collective
4838 
4839   Input Parameters:
4840 . A - The matrix in mpiaij format
4841 
4842   Output Parameter:
4843 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4844 . colmap - A map from global column index to local index into lvec
4845 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4846 
4847   Level: developer
4848 
4849 @*/
4850 #if defined(PETSC_USE_CTABLE)
4851 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4852 #else
4853 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4854 #endif
4855 {
4856   Mat_MPIAIJ *a;
4857 
4858   PetscFunctionBegin;
4859   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4860   PetscValidPointer(lvec, 2);
4861   PetscValidPointer(colmap, 3);
4862   PetscValidPointer(multScatter, 4);
4863   a = (Mat_MPIAIJ*) A->data;
4864   if (lvec) *lvec = a->lvec;
4865   if (colmap) *colmap = a->colmap;
4866   if (multScatter) *multScatter = a->Mvctx;
4867   PetscFunctionReturn(0);
4868 }
4869 
4870 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4871 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4872 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4873 #if defined(PETSC_HAVE_ELEMENTAL)
4874 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4875 #endif
4876 
4877 #undef __FUNCT__
4878 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4879 /*
4880     Computes (B'*A')' since computing B*A directly is untenable
4881 
4882                n                       p                          p
4883         (              )       (              )         (                  )
4884       m (      A       )  *  n (       B      )   =   m (         C        )
4885         (              )       (              )         (                  )
4886 
4887 */
4888 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4889 {
4890   PetscErrorCode ierr;
4891   Mat            At,Bt,Ct;
4892 
4893   PetscFunctionBegin;
4894   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4895   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4896   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4897   ierr = MatDestroy(&At);CHKERRQ(ierr);
4898   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4899   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4900   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4901   PetscFunctionReturn(0);
4902 }
4903 
4904 #undef __FUNCT__
4905 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4906 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4907 {
4908   PetscErrorCode ierr;
4909   PetscInt       m=A->rmap->n,n=B->cmap->n;
4910   Mat            Cmat;
4911 
4912   PetscFunctionBegin;
4913   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4914   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4915   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4916   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4917   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4918   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4919   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4920   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4921 
4922   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4923 
4924   *C = Cmat;
4925   PetscFunctionReturn(0);
4926 }
4927 
4928 /* ----------------------------------------------------------------*/
4929 #undef __FUNCT__
4930 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4931 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4932 {
4933   PetscErrorCode ierr;
4934 
4935   PetscFunctionBegin;
4936   if (scall == MAT_INITIAL_MATRIX) {
4937     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4938     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4939     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4940   }
4941   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4942   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4943   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4944   PetscFunctionReturn(0);
4945 }
4946 
4947 /*MC
4948    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4949 
4950    Options Database Keys:
4951 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4952 
4953   Level: beginner
4954 
4955 .seealso: MatCreateAIJ()
4956 M*/
4957 
4958 #undef __FUNCT__
4959 #define __FUNCT__ "MatCreate_MPIAIJ"
4960 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
4961 {
4962   Mat_MPIAIJ     *b;
4963   PetscErrorCode ierr;
4964   PetscMPIInt    size;
4965 
4966   PetscFunctionBegin;
4967   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
4968 
4969   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
4970   B->data       = (void*)b;
4971   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4972   B->assembled  = PETSC_FALSE;
4973   B->insertmode = NOT_SET_VALUES;
4974   b->size       = size;
4975 
4976   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
4977 
4978   /* build cache for off array entries formed */
4979   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
4980 
4981   b->donotstash  = PETSC_FALSE;
4982   b->colmap      = 0;
4983   b->garray      = 0;
4984   b->roworiented = PETSC_TRUE;
4985 
4986   /* stuff used for matrix vector multiply */
4987   b->lvec  = NULL;
4988   b->Mvctx = NULL;
4989 
4990   /* stuff for MatGetRow() */
4991   b->rowindices   = 0;
4992   b->rowvalues    = 0;
4993   b->getrowactive = PETSC_FALSE;
4994 
4995   /* flexible pointer used in CUSP/CUSPARSE classes */
4996   b->spptr = NULL;
4997 
4998   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4999   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5000   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5001   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5002   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5003   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5004   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5005   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5006   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5007   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5008 #if defined(PETSC_HAVE_ELEMENTAL)
5009   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5010 #endif
5011   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5012   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5013   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5014   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5015   PetscFunctionReturn(0);
5016 }
5017 
5018 #undef __FUNCT__
5019 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5020 /*@C
5021      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5022          and "off-diagonal" part of the matrix in CSR format.
5023 
5024    Collective on MPI_Comm
5025 
5026    Input Parameters:
5027 +  comm - MPI communicator
5028 .  m - number of local rows (Cannot be PETSC_DECIDE)
5029 .  n - This value should be the same as the local size used in creating the
5030        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5031        calculated if N is given) For square matrices n is almost always m.
5032 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5033 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5034 .   i - row indices for "diagonal" portion of matrix
5035 .   j - column indices
5036 .   a - matrix values
5037 .   oi - row indices for "off-diagonal" portion of matrix
5038 .   oj - column indices
5039 -   oa - matrix values
5040 
5041    Output Parameter:
5042 .   mat - the matrix
5043 
5044    Level: advanced
5045 
5046    Notes:
5047        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5048        must free the arrays once the matrix has been destroyed and not before.
5049 
5050        The i and j indices are 0 based
5051 
5052        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5053 
5054        This sets local rows and cannot be used to set off-processor values.
5055 
5056        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5057        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5058        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5059        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5060        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5061        communication if it is known that only local entries will be set.
5062 
5063 .keywords: matrix, aij, compressed row, sparse, parallel
5064 
5065 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5066           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5067 @*/
5068 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5069 {
5070   PetscErrorCode ierr;
5071   Mat_MPIAIJ     *maij;
5072 
5073   PetscFunctionBegin;
5074   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5075   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5076   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5077   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5078   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5079   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5080   maij = (Mat_MPIAIJ*) (*mat)->data;
5081 
5082   (*mat)->preallocated = PETSC_TRUE;
5083 
5084   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5085   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5086 
5087   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5088   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5089 
5090   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5091   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5092   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5093   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5094 
5095   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5096   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5097   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5098   PetscFunctionReturn(0);
5099 }
5100 
5101 /*
5102     Special version for direct calls from Fortran
5103 */
5104 #include <petsc/private/fortranimpl.h>
5105 
5106 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5107 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5108 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5109 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5110 #endif
5111 
5112 /* Change these macros so can be used in void function */
5113 #undef CHKERRQ
5114 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5115 #undef SETERRQ2
5116 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5117 #undef SETERRQ3
5118 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5119 #undef SETERRQ
5120 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5121 
5122 #undef __FUNCT__
5123 #define __FUNCT__ "matsetvaluesmpiaij_"
5124 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5125 {
5126   Mat            mat  = *mmat;
5127   PetscInt       m    = *mm, n = *mn;
5128   InsertMode     addv = *maddv;
5129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5130   PetscScalar    value;
5131   PetscErrorCode ierr;
5132 
5133   MatCheckPreallocated(mat,1);
5134   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5135 
5136 #if defined(PETSC_USE_DEBUG)
5137   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5138 #endif
5139   {
5140     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5141     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5142     PetscBool roworiented = aij->roworiented;
5143 
5144     /* Some Variables required in the macro */
5145     Mat        A                 = aij->A;
5146     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5147     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5148     MatScalar  *aa               = a->a;
5149     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5150     Mat        B                 = aij->B;
5151     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5152     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5153     MatScalar  *ba               = b->a;
5154 
5155     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5156     PetscInt  nonew = a->nonew;
5157     MatScalar *ap1,*ap2;
5158 
5159     PetscFunctionBegin;
5160     for (i=0; i<m; i++) {
5161       if (im[i] < 0) continue;
5162 #if defined(PETSC_USE_DEBUG)
5163       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5164 #endif
5165       if (im[i] >= rstart && im[i] < rend) {
5166         row      = im[i] - rstart;
5167         lastcol1 = -1;
5168         rp1      = aj + ai[row];
5169         ap1      = aa + ai[row];
5170         rmax1    = aimax[row];
5171         nrow1    = ailen[row];
5172         low1     = 0;
5173         high1    = nrow1;
5174         lastcol2 = -1;
5175         rp2      = bj + bi[row];
5176         ap2      = ba + bi[row];
5177         rmax2    = bimax[row];
5178         nrow2    = bilen[row];
5179         low2     = 0;
5180         high2    = nrow2;
5181 
5182         for (j=0; j<n; j++) {
5183           if (roworiented) value = v[i*n+j];
5184           else value = v[i+j*m];
5185           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5186           if (in[j] >= cstart && in[j] < cend) {
5187             col = in[j] - cstart;
5188             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5189           } else if (in[j] < 0) continue;
5190 #if defined(PETSC_USE_DEBUG)
5191           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5192 #endif
5193           else {
5194             if (mat->was_assembled) {
5195               if (!aij->colmap) {
5196                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5197               }
5198 #if defined(PETSC_USE_CTABLE)
5199               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5200               col--;
5201 #else
5202               col = aij->colmap[in[j]] - 1;
5203 #endif
5204               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5205                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5206                 col  =  in[j];
5207                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5208                 B     = aij->B;
5209                 b     = (Mat_SeqAIJ*)B->data;
5210                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5211                 rp2   = bj + bi[row];
5212                 ap2   = ba + bi[row];
5213                 rmax2 = bimax[row];
5214                 nrow2 = bilen[row];
5215                 low2  = 0;
5216                 high2 = nrow2;
5217                 bm    = aij->B->rmap->n;
5218                 ba    = b->a;
5219               }
5220             } else col = in[j];
5221             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5222           }
5223         }
5224       } else if (!aij->donotstash) {
5225         if (roworiented) {
5226           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5227         } else {
5228           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5229         }
5230       }
5231     }
5232   }
5233   PetscFunctionReturnVoid();
5234 }
5235 
5236